diff --git a/.buildinfo b/.buildinfo new file mode 100644 index 0000000..bb21d4d --- /dev/null +++ b/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file records the configuration used when building these files. When it is not found, a full rebuild will be done. +config: a41fc0cab2f773448e4ba86f392a4181 +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle new file mode 100644 index 0000000..e166947 Binary files /dev/null and b/.doctrees/environment.pickle differ diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree new file mode 100644 index 0000000..93d9d36 Binary files /dev/null and b/.doctrees/index.doctree differ diff --git a/.doctrees/sources/Diffusers/index.doctree b/.doctrees/sources/Diffusers/index.doctree new file mode 100644 index 0000000..c4f2d4c Binary files /dev/null and b/.doctrees/sources/Diffusers/index.doctree differ diff --git a/.doctrees/sources/Diffusers/install.doctree b/.doctrees/sources/Diffusers/install.doctree new file mode 100644 index 0000000..94910c6 Binary files /dev/null and b/.doctrees/sources/Diffusers/install.doctree differ diff --git a/.doctrees/sources/Diffusers/quick_start.doctree b/.doctrees/sources/Diffusers/quick_start.doctree new file mode 100644 index 0000000..f6dcee1 Binary files /dev/null and b/.doctrees/sources/Diffusers/quick_start.doctree differ diff --git a/.doctrees/sources/accelerate/index.doctree b/.doctrees/sources/accelerate/index.doctree new file mode 100644 index 0000000..456e01c Binary files /dev/null and b/.doctrees/sources/accelerate/index.doctree differ diff --git a/.doctrees/sources/accelerate/install.doctree b/.doctrees/sources/accelerate/install.doctree new file mode 100644 index 0000000..dd1b5b0 Binary files /dev/null and b/.doctrees/sources/accelerate/install.doctree differ diff --git a/.doctrees/sources/accelerate/quick_start.doctree b/.doctrees/sources/accelerate/quick_start.doctree new file mode 100644 index 0000000..30a7b34 Binary files /dev/null and b/.doctrees/sources/accelerate/quick_start.doctree differ diff --git a/.doctrees/sources/ascend/quick_install.doctree b/.doctrees/sources/ascend/quick_install.doctree new file mode 100644 index 0000000..51ab60f Binary files /dev/null and b/.doctrees/sources/ascend/quick_install.doctree differ diff --git a/.doctrees/sources/deepspeed/index.doctree b/.doctrees/sources/deepspeed/index.doctree new file mode 100644 index 0000000..dda885d Binary files /dev/null and b/.doctrees/sources/deepspeed/index.doctree differ diff --git a/.doctrees/sources/deepspeed/install.doctree b/.doctrees/sources/deepspeed/install.doctree new file mode 100644 index 0000000..5b05460 Binary files /dev/null and b/.doctrees/sources/deepspeed/install.doctree differ diff --git a/.doctrees/sources/deepspeed/quick_start.doctree b/.doctrees/sources/deepspeed/quick_start.doctree new file mode 100644 index 0000000..2166966 Binary files /dev/null and b/.doctrees/sources/deepspeed/quick_start.doctree differ diff --git a/.doctrees/sources/llama_cpp/index.doctree b/.doctrees/sources/llama_cpp/index.doctree new file mode 100644 index 0000000..c34d54e Binary files /dev/null and b/.doctrees/sources/llama_cpp/index.doctree differ diff --git a/.doctrees/sources/llama_cpp/install.doctree b/.doctrees/sources/llama_cpp/install.doctree new file mode 100644 index 0000000..3c07d69 Binary files /dev/null and b/.doctrees/sources/llama_cpp/install.doctree differ diff --git a/.doctrees/sources/llama_cpp/quick_start.doctree b/.doctrees/sources/llama_cpp/quick_start.doctree new file mode 100644 index 0000000..0d06084 Binary files /dev/null and b/.doctrees/sources/llama_cpp/quick_start.doctree differ diff --git a/.doctrees/sources/llamafactory/example.doctree b/.doctrees/sources/llamafactory/example.doctree new file mode 100644 index 0000000..ecf1f53 Binary files /dev/null and b/.doctrees/sources/llamafactory/example.doctree differ diff --git a/.doctrees/sources/llamafactory/faq.doctree b/.doctrees/sources/llamafactory/faq.doctree new file mode 100644 index 0000000..495c409 Binary files /dev/null and b/.doctrees/sources/llamafactory/faq.doctree differ diff --git a/.doctrees/sources/llamafactory/index.doctree b/.doctrees/sources/llamafactory/index.doctree new file mode 100644 index 0000000..646ff18 Binary files /dev/null and b/.doctrees/sources/llamafactory/index.doctree differ diff --git a/.doctrees/sources/llamafactory/install.doctree b/.doctrees/sources/llamafactory/install.doctree new file mode 100644 index 0000000..ee9cc50 Binary files /dev/null and b/.doctrees/sources/llamafactory/install.doctree differ diff --git a/.doctrees/sources/llamafactory/multi_npu.doctree b/.doctrees/sources/llamafactory/multi_npu.doctree new file mode 100644 index 0000000..4fdf287 Binary files /dev/null and b/.doctrees/sources/llamafactory/multi_npu.doctree differ diff --git a/.doctrees/sources/llamafactory/quick_start.doctree b/.doctrees/sources/llamafactory/quick_start.doctree new file mode 100644 index 0000000..e65e28b Binary files /dev/null and b/.doctrees/sources/llamafactory/quick_start.doctree differ diff --git a/.doctrees/sources/lm_deploy/index.doctree b/.doctrees/sources/lm_deploy/index.doctree new file mode 100644 index 0000000..e5a49eb Binary files /dev/null and b/.doctrees/sources/lm_deploy/index.doctree differ diff --git a/.doctrees/sources/lm_deploy/install.doctree b/.doctrees/sources/lm_deploy/install.doctree new file mode 100644 index 0000000..ec911fb Binary files /dev/null and b/.doctrees/sources/lm_deploy/install.doctree differ diff --git a/.doctrees/sources/lm_deploy/quick_start.doctree b/.doctrees/sources/lm_deploy/quick_start.doctree new file mode 100644 index 0000000..9f8b264 Binary files /dev/null and b/.doctrees/sources/lm_deploy/quick_start.doctree differ diff --git a/.doctrees/sources/lm_evaluation/index.doctree b/.doctrees/sources/lm_evaluation/index.doctree new file mode 100644 index 0000000..03c0db3 Binary files /dev/null and b/.doctrees/sources/lm_evaluation/index.doctree differ diff --git a/.doctrees/sources/lm_evaluation/install.doctree b/.doctrees/sources/lm_evaluation/install.doctree new file mode 100644 index 0000000..991ba0b Binary files /dev/null and b/.doctrees/sources/lm_evaluation/install.doctree differ diff --git a/.doctrees/sources/lm_evaluation/quick_start.doctree b/.doctrees/sources/lm_evaluation/quick_start.doctree new file mode 100644 index 0000000..b91120e Binary files /dev/null and b/.doctrees/sources/lm_evaluation/quick_start.doctree differ diff --git a/.doctrees/sources/onnxruntime/index.doctree b/.doctrees/sources/onnxruntime/index.doctree new file mode 100644 index 0000000..982b2f3 Binary files /dev/null and b/.doctrees/sources/onnxruntime/index.doctree differ diff --git a/.doctrees/sources/onnxruntime/install.doctree b/.doctrees/sources/onnxruntime/install.doctree new file mode 100644 index 0000000..e0d4b9b Binary files /dev/null and b/.doctrees/sources/onnxruntime/install.doctree differ diff --git a/.doctrees/sources/onnxruntime/quick_start.doctree b/.doctrees/sources/onnxruntime/quick_start.doctree new file mode 100644 index 0000000..08f463f Binary files /dev/null and b/.doctrees/sources/onnxruntime/quick_start.doctree differ diff --git a/.doctrees/sources/open_clip/index.doctree b/.doctrees/sources/open_clip/index.doctree new file mode 100644 index 0000000..ee59fdb Binary files /dev/null and b/.doctrees/sources/open_clip/index.doctree differ diff --git a/.doctrees/sources/open_clip/install.doctree b/.doctrees/sources/open_clip/install.doctree new file mode 100644 index 0000000..a4caa6a Binary files /dev/null and b/.doctrees/sources/open_clip/install.doctree differ diff --git a/.doctrees/sources/open_clip/quick_start.doctree b/.doctrees/sources/open_clip/quick_start.doctree new file mode 100644 index 0000000..81c906b Binary files /dev/null and b/.doctrees/sources/open_clip/quick_start.doctree differ diff --git a/.doctrees/sources/opencompass/index.doctree b/.doctrees/sources/opencompass/index.doctree new file mode 100644 index 0000000..dbc7a8d Binary files /dev/null and b/.doctrees/sources/opencompass/index.doctree differ diff --git a/.doctrees/sources/opencompass/install.doctree b/.doctrees/sources/opencompass/install.doctree new file mode 100644 index 0000000..74fadbf Binary files /dev/null and b/.doctrees/sources/opencompass/install.doctree differ diff --git a/.doctrees/sources/opencompass/quick_start.doctree b/.doctrees/sources/opencompass/quick_start.doctree new file mode 100644 index 0000000..4c2ca31 Binary files /dev/null and b/.doctrees/sources/opencompass/quick_start.doctree differ diff --git a/.doctrees/sources/opencv/index.doctree b/.doctrees/sources/opencv/index.doctree new file mode 100644 index 0000000..9363d1a Binary files /dev/null and b/.doctrees/sources/opencv/index.doctree differ diff --git a/.doctrees/sources/opencv/install.doctree b/.doctrees/sources/opencv/install.doctree new file mode 100644 index 0000000..ab51a34 Binary files /dev/null and b/.doctrees/sources/opencv/install.doctree differ diff --git a/.doctrees/sources/opencv/quick_start.doctree b/.doctrees/sources/opencv/quick_start.doctree new file mode 100644 index 0000000..bb1e62b Binary files /dev/null and b/.doctrees/sources/opencv/quick_start.doctree differ diff --git a/.doctrees/sources/pytorch/api_doc.doctree b/.doctrees/sources/pytorch/api_doc.doctree new file mode 100644 index 0000000..5a17126 Binary files /dev/null and b/.doctrees/sources/pytorch/api_doc.doctree differ diff --git a/.doctrees/sources/pytorch/examples.doctree b/.doctrees/sources/pytorch/examples.doctree new file mode 100644 index 0000000..305315d Binary files /dev/null and b/.doctrees/sources/pytorch/examples.doctree differ diff --git a/.doctrees/sources/pytorch/faq.doctree b/.doctrees/sources/pytorch/faq.doctree new file mode 100644 index 0000000..51c75df Binary files /dev/null and b/.doctrees/sources/pytorch/faq.doctree differ diff --git a/.doctrees/sources/pytorch/index.doctree b/.doctrees/sources/pytorch/index.doctree new file mode 100644 index 0000000..6c3773e Binary files /dev/null and b/.doctrees/sources/pytorch/index.doctree differ diff --git a/.doctrees/sources/pytorch/install.doctree b/.doctrees/sources/pytorch/install.doctree new file mode 100644 index 0000000..0c3d04c Binary files /dev/null and b/.doctrees/sources/pytorch/install.doctree differ diff --git a/.doctrees/sources/pytorch/quick_start.doctree b/.doctrees/sources/pytorch/quick_start.doctree new file mode 100644 index 0000000..7f6e321 Binary files /dev/null and b/.doctrees/sources/pytorch/quick_start.doctree differ diff --git a/.doctrees/sources/sd_webui/index.doctree b/.doctrees/sources/sd_webui/index.doctree new file mode 100644 index 0000000..773ae27 Binary files /dev/null and b/.doctrees/sources/sd_webui/index.doctree differ diff --git a/.doctrees/sources/sd_webui/install.doctree b/.doctrees/sources/sd_webui/install.doctree new file mode 100644 index 0000000..e1f5555 Binary files /dev/null and b/.doctrees/sources/sd_webui/install.doctree differ diff --git a/.doctrees/sources/sd_webui/quick_start.doctree b/.doctrees/sources/sd_webui/quick_start.doctree new file mode 100644 index 0000000..4900056 Binary files /dev/null and b/.doctrees/sources/sd_webui/quick_start.doctree differ diff --git a/.doctrees/sources/sentence_transformers/index.doctree b/.doctrees/sources/sentence_transformers/index.doctree new file mode 100644 index 0000000..51f5fc7 Binary files /dev/null and b/.doctrees/sources/sentence_transformers/index.doctree differ diff --git a/.doctrees/sources/sentence_transformers/install.doctree b/.doctrees/sources/sentence_transformers/install.doctree new file mode 100644 index 0000000..cd347ee Binary files /dev/null and b/.doctrees/sources/sentence_transformers/install.doctree differ diff --git a/.doctrees/sources/sentence_transformers/quick_start.doctree b/.doctrees/sources/sentence_transformers/quick_start.doctree new file mode 100644 index 0000000..36120c0 Binary files /dev/null and b/.doctrees/sources/sentence_transformers/quick_start.doctree differ diff --git a/.doctrees/sources/timm/index.doctree b/.doctrees/sources/timm/index.doctree new file mode 100644 index 0000000..91792ca Binary files /dev/null and b/.doctrees/sources/timm/index.doctree differ diff --git a/.doctrees/sources/timm/install.doctree b/.doctrees/sources/timm/install.doctree new file mode 100644 index 0000000..fd1effc Binary files /dev/null and b/.doctrees/sources/timm/install.doctree differ diff --git a/.doctrees/sources/timm/quick_start.doctree b/.doctrees/sources/timm/quick_start.doctree new file mode 100644 index 0000000..fe496e8 Binary files /dev/null and b/.doctrees/sources/timm/quick_start.doctree differ diff --git a/.doctrees/sources/transformers/fine-tune.doctree b/.doctrees/sources/transformers/fine-tune.doctree new file mode 100644 index 0000000..5d8fe5e Binary files /dev/null and b/.doctrees/sources/transformers/fine-tune.doctree differ diff --git a/.doctrees/sources/transformers/index.doctree b/.doctrees/sources/transformers/index.doctree new file mode 100644 index 0000000..d29a60c Binary files /dev/null and b/.doctrees/sources/transformers/index.doctree differ diff --git a/.doctrees/sources/transformers/inference.doctree b/.doctrees/sources/transformers/inference.doctree new file mode 100644 index 0000000..b5d6f99 Binary files /dev/null and b/.doctrees/sources/transformers/inference.doctree differ diff --git a/.doctrees/sources/transformers/install.doctree b/.doctrees/sources/transformers/install.doctree new file mode 100644 index 0000000..b1492ff Binary files /dev/null and b/.doctrees/sources/transformers/install.doctree differ diff --git a/.doctrees/sources/transformers/modeldownload.doctree b/.doctrees/sources/transformers/modeldownload.doctree new file mode 100644 index 0000000..ec4b16f Binary files /dev/null and b/.doctrees/sources/transformers/modeldownload.doctree differ diff --git a/.doctrees/sources/transformers/quick_start.doctree b/.doctrees/sources/transformers/quick_start.doctree new file mode 100644 index 0000000..af105a9 Binary files /dev/null and b/.doctrees/sources/transformers/quick_start.doctree differ diff --git a/.doctrees/sources/trl/index.doctree b/.doctrees/sources/trl/index.doctree new file mode 100644 index 0000000..817ad1d Binary files /dev/null and b/.doctrees/sources/trl/index.doctree differ diff --git a/.doctrees/sources/trl/install.doctree b/.doctrees/sources/trl/install.doctree new file mode 100644 index 0000000..4ad90ea Binary files /dev/null and b/.doctrees/sources/trl/install.doctree differ diff --git a/.doctrees/sources/trl/quick_start.doctree b/.doctrees/sources/trl/quick_start.doctree new file mode 100644 index 0000000..97273e1 Binary files /dev/null and b/.doctrees/sources/trl/quick_start.doctree differ diff --git a/.doctrees/sources/wenet/index.doctree b/.doctrees/sources/wenet/index.doctree new file mode 100644 index 0000000..867fb64 Binary files /dev/null and b/.doctrees/sources/wenet/index.doctree differ diff --git a/.doctrees/sources/wenet/install.doctree b/.doctrees/sources/wenet/install.doctree new file mode 100644 index 0000000..8875e6a Binary files /dev/null and b/.doctrees/sources/wenet/install.doctree differ diff --git a/.doctrees/sources/wenet/quick_start.doctree b/.doctrees/sources/wenet/quick_start.doctree new file mode 100644 index 0000000..05dccaf Binary files /dev/null and b/.doctrees/sources/wenet/quick_start.doctree differ diff --git a/.doctrees/sources/whisper_cpp/index.doctree b/.doctrees/sources/whisper_cpp/index.doctree new file mode 100644 index 0000000..c8debbb Binary files /dev/null and b/.doctrees/sources/whisper_cpp/index.doctree differ diff --git a/.doctrees/sources/whisper_cpp/install.doctree b/.doctrees/sources/whisper_cpp/install.doctree new file mode 100644 index 0000000..46863e6 Binary files /dev/null and b/.doctrees/sources/whisper_cpp/install.doctree differ diff --git a/.doctrees/sources/whisper_cpp/quick_start.doctree b/.doctrees/sources/whisper_cpp/quick_start.doctree new file mode 100644 index 0000000..e68b175 Binary files /dev/null and b/.doctrees/sources/whisper_cpp/quick_start.doctree differ diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/_images/CLIP.png b/_images/CLIP.png new file mode 100644 index 0000000..2787485 Binary files /dev/null and b/_images/CLIP.png differ diff --git a/_images/cat.png b/_images/cat.png new file mode 100644 index 0000000..9104243 Binary files /dev/null and b/_images/cat.png differ diff --git a/_images/catwearhat.png b/_images/catwearhat.png new file mode 100644 index 0000000..dde1347 Binary files /dev/null and b/_images/catwearhat.png differ diff --git a/_images/chat-llamafactory.gif b/_images/chat-llamafactory.gif new file mode 100644 index 0000000..2b93ac6 Binary files /dev/null and b/_images/chat-llamafactory.gif differ diff --git a/_images/downloadmodel.png b/_images/downloadmodel.png new file mode 100644 index 0000000..e2e119f Binary files /dev/null and b/_images/downloadmodel.png differ diff --git a/_images/image.png b/_images/image.png new file mode 100644 index 0000000..bf674be Binary files /dev/null and b/_images/image.png differ diff --git a/_images/input.png b/_images/input.png new file mode 100644 index 0000000..c337954 Binary files /dev/null and b/_images/input.png differ diff --git a/_images/mainparameters.png b/_images/mainparameters.png new file mode 100644 index 0000000..23a504a Binary files /dev/null and b/_images/mainparameters.png differ diff --git a/_images/moreparameters.png b/_images/moreparameters.png new file mode 100644 index 0000000..86fcf4e Binary files /dev/null and b/_images/moreparameters.png differ diff --git a/_images/opencv_cannop.png b/_images/opencv_cannop.png new file mode 100644 index 0000000..982c57d Binary files /dev/null and b/_images/opencv_cannop.png differ diff --git a/_images/pytorch_wechat.jpg b/_images/pytorch_wechat.jpg new file mode 100644 index 0000000..ba0dae8 Binary files /dev/null and b/_images/pytorch_wechat.jpg differ diff --git a/_images/result.png b/_images/result.png new file mode 100644 index 0000000..0411455 Binary files /dev/null and b/_images/result.png differ diff --git a/_images/sft-chat.gif b/_images/sft-chat.gif new file mode 100644 index 0000000..d6a2a7d Binary files /dev/null and b/_images/sft-chat.gif differ diff --git a/_images/webchat.png b/_images/webchat.png new file mode 100644 index 0000000..182f224 Binary files /dev/null and b/_images/webchat.png differ diff --git a/_sources/index.rst.txt b/_sources/index.rst.txt new file mode 100644 index 0000000..09f80b3 --- /dev/null +++ b/_sources/index.rst.txt @@ -0,0 +1,363 @@ +昇腾开源 +============ + +.. ----------------------------------------- +.. Page TOC +.. ----------------------------------------- +.. toctree:: + :maxdepth: 2 + :hidden: + :includehidden: + :caption: 开始使用 + + sources/ascend/quick_install.rst + +.. toctree:: + :maxdepth: 2 + :hidden: + :includehidden: + :caption: 原生支持的AI项目 + + sources/pytorch/index.rst + sources/llamafactory/index.rst + sources/accelerate/index.rst + sources/transformers/index.rst + sources/deepspeed/index.rst + sources/onnxruntime/index.rst + sources/open_clip/index.rst + sources/timm/index.rst + sources/Diffusers/index.rst + sources/opencv/index.rst + sources/sd_webui/index.rst + sources/lm_evaluation/index.rst + sources/wenet/index.rst + sources/whisper_cpp/index.rst + sources/llama_cpp/index.rst + sources/sentence_transformers/index.rst + sources/trl/index.rst + sources/opencompass/index.rst + sources/lm_deploy/index.rst + +.. warning:: + + 文档仍在开发中,内容可能存在错误,内容可能会随时更新,请勿将其用于生产环境。 + +选择您的偏好,并按照 :doc:`快速安装昇腾环境` 的安装指导进行操作。 + +安装成功后,请参考各项目的快速开始和样例来开始使用昇腾AI处理器。 + +.. raw:: html + +
+
+ +
+
+
+
+

LLaMA-Factory

+

便捷高效的大模型微调工具。V0.7.1版本起支持昇腾。

+
+
+
+ +
+ +
+
+
+
+

PyTorch

+

PyTorch AI框架 2.1版本官方支持昇腾

+
+
+
+ +
+ +
+
+
+
+

ONNX Runtime

+

跨平台、高性能 ML 推理和训练加速器。v1.13.1版本起原生支持昇腾

+
+
+
+ +
+ +
+
+
+
+

DeepSpeed

+

深度学习优化库,使得分布式训练和推理变得简单、高效、有效。 + V0.10.1版本起支持昇腾。

+
+
+
+ +
+ +
+
+
+
+

OpenCV

+

开源计算机视觉库

+
+
+
+ +
+ +
+
+
+
+

Stable Diffusion web UI

+

Stable diffusion可视化工具链

+
+
+
+ +
+ +
+
+
+
+

Transformers

+

适用于 Pytorch、TensorFlow 和 JAX 先进的机器学习库 + v4.32.0起支持昇腾

+
+
+
+ +
+ +
+
+
+
+

Diffusers

+

图像和音频生成等扩散模型工具链

+
+
+
+ +
+ +
+
+
+
+

Accelerate

+

适用于Pytorch的多GPUs训练工具链

+
+
+
+ +
+ +
+
+
+
+

WeNet

+

端到端的语音识别工具包

+
+
+
+ +
+ +
+
+
+
+

LM-Evalution-Harness

+

语言模型评估工具

+
+
+
+ +
+ +
+
+
+
+

Whisper.cpp

+

Whisper 模型高性能推理语音识别框架

+
+
+
+ +
+ +
+
+
+
+

llama.cpp

+

由C/C++实现的 Meta LLaMa 架构

+
+
+
+ +
+ +
+
+
+
+

Sentence Transformers

+

适用于文本和图像的高性能Embedding库

+
+
+
+ +
+ +
+
+
+
+

Transformer Reinforcement Learning

+

适用于SFT、PPO、DPO等方法的模型后训练库

+
+
+
+ +
+ +
+
+
+
+

OpenCompass

+

大模型标准测试工具

+
+
+
+ +
+ +
+
+
+
+

LMDeploy

+

用于压缩、部署和服务 LLM 的工具包

+
+
+
+ +
+ +
+
diff --git a/_sources/sources/Diffusers/index.rst.txt b/_sources/sources/Diffusers/index.rst.txt new file mode 100644 index 0000000..43bfb55 --- /dev/null +++ b/_sources/sources/Diffusers/index.rst.txt @@ -0,0 +1,8 @@ +Diffusers +=========== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/_sources/sources/Diffusers/install.rst.txt b/_sources/sources/Diffusers/install.rst.txt new file mode 100644 index 0000000..2e57a65 --- /dev/null +++ b/_sources/sources/Diffusers/install.rst.txt @@ -0,0 +1,52 @@ +安装指南 +============== + +本教程面向使用 Diffusers & 昇腾开发者,帮助完成昇腾环境下 Diffusers 的安装。 + +昇腾环境安装 +------------ + +请根据已有昇腾产品型号及CPU架构等按照 :doc:`快速安装昇腾环境指引 <../ascend/quick_install>` 进行昇腾环境安装,或直接获取对应产品的昇腾环境镜像 `ascendai/cann `_ 。 + +.. warning:: + CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。 + +Diffusers 安装 +------------------ + +Python 环境创建 +------------------ + +.. code-block:: shell + :linenos: + + # 创建名为 diffusers 的 python 3.10 的虚拟环境 + conda create -y -n diffusers python=3.10 + # 激活虚拟环境 + conda activate diffusers + + +pip 安装 +------------------ + +通过以下指令安装 Diffusers 及 torch-npu: + +.. code-block:: shell + :linenos: + + pip install diffusers torch==2.2.0 torch-npu==2.2.0 torchvision -i https://pypi.tuna.tsinghua.edu.cn/simple + + +安装校验 +------------------ + +执行以下代码,若无任何报错,仅打印模型下载过程,即说明安装成功: + +.. code-block:: python + :linenos: + + from diffusers import DiffusionPipeline + import torch + + pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16) + pipeline.to("npu") diff --git a/_sources/sources/Diffusers/quick_start.rst.txt b/_sources/sources/Diffusers/quick_start.rst.txt new file mode 100644 index 0000000..1934809 --- /dev/null +++ b/_sources/sources/Diffusers/quick_start.rst.txt @@ -0,0 +1,102 @@ +快速开始 +================== + +.. note:: + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 Diffusers ! + +本示例以文生图 Diffusers 库中文生图任务为样例,展示如何进行文生图模型 stable-diffusion-xl-base-1.0 的基于 LoRA 的微调及动态合并 LoRA 的推理。 + +文生图 +------------- + +.. _download: + +模型及数据集下载 +~~~~~~~~~~~~~~~~~~~~ + +1. 请提前下载 `stabilityai/stable-diffusion-xl-base-1.0 `_ 模型至自定义路径 + +2. 请提前下载 `madebyollin/sdxl-vae-fp16-fix `_ 模型至自定义路径 + +3. 请提前下载 `reach-vb/pokemon-blip-captions `_ 数据集至自定义路径 + + +.. _finetune: + +基于 LoRA 的微调 +~~~~~~~~~~~~~~~~~~~~ + +进入 Diffusers 项目目录,新建并执行以下脚本: + +.. note:: + + 请根据 :ref:`download` 中模型及数据集的实际缓存路径指定 stable-diffusion-xl-base-1.0 模型缓存路径 ``MODEL_NAME``,sdxl-vae-fp16-fix 模型缓存路径 ``VAE_NAME`` 和。 + +.. code-block:: shell + :linenos: + :emphasize-lines: 1,2,3 + + export MODEL_NAME="./models_ckpt/stable-diffusion-xl-base-1.0/" + export VAE_NAME="./ckpt/sdxl-vae-fp16-fix" + export TRAIN_DIR="~/diffusers/data/pokemon-blip-captions/pokemon" + + python3 ./examples/text_to_image/train_text_to_image_lora_sdxl.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --pretrained_vae_model_name_or_path=$VAE_NAME \ + --dataset_name=$DATASET_NAME --caption_column="text" \ + --resolution=1024 \ + --random_flip \ + --train_batch_size=1 \ + --num_train_epochs=2 \ + --checkpointing_steps=500 \ + --learning_rate=1e-04 \ + --lr_scheduler="constant" \ + --lr_warmup_steps=0 \ + --mixed_precision="no" \ + --seed=42 \ + --output_dir="sd-pokemon-model-lora-sdxl" \ + --validation_prompt="cute dragon creature" + +微调过程无报错,并且终端显示 ``Steps: 100%`` 的进度条说明微调成功。 + + +动态合并 LoRA 的推理 +~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + 请根据 :ref:`download` 中模型实际缓存路径指定 ``model_path`` + + 根据 :ref:`finetune` 中指定的 LoRA 模型路径 ``output_dir`` 指定 ``lora_model_path`` + + [可选] 修改 ``prompt`` 可使得生成图像改变 + +.. code-block:: python + :linenos: + :emphasize-lines: 9 + + from diffusers import DiffusionPipeline + import torch + + lora_model_path = "path/to/sd-pokemon-model-lora-sdxl/checkpoint-800/" + model_path = "./models_ckpt/stable-diffusion-xl-base-1.0/" + pipe = DiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16) + + # 将模型放到 NPU 上 + pipe.to("npu") + + # 加载 LoRA 权重 + pipe.load_lora_weights(lora_model_path) + # 输入 prompt + prompt = "Sylveon Pokemon with elegant features, magical design, \ + light purple aura, extremely detailed and intricate markings, \ + photo realistic, unreal engine, octane render" + # 推理 + image = pipe(prompt, num_inference_steps=30, guidance_scale=7.5).images[0] + + image.save("pokemon-finetuned-inference-generation.png") + + +微调过程无报错,并且终端显示 ``Loading pipeline components...: 100%`` 的进度条说明微调成功。 +查看当前目录下保存的 ``pokemon-finetuned-inference-generation.png`` 图像,可根据 ``prompt`` 生成内容相关的图像说明推理成功。 + diff --git a/_sources/sources/accelerate/index.rst.txt b/_sources/sources/accelerate/index.rst.txt new file mode 100644 index 0000000..a7d82a9 --- /dev/null +++ b/_sources/sources/accelerate/index.rst.txt @@ -0,0 +1,8 @@ +Accelerate +============== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst \ No newline at end of file diff --git a/_sources/sources/accelerate/install.rst.txt b/_sources/sources/accelerate/install.rst.txt new file mode 100644 index 0000000..8490be2 --- /dev/null +++ b/_sources/sources/accelerate/install.rst.txt @@ -0,0 +1,28 @@ +安装指南 +============== + +本教程面向使用 Accelerate & 昇腾的开发者,帮助完成昇腾环境下 Accelerate 的安装。 + +Accelerate 下载安装 +-------------------- + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境! + 或者直接使用具备昇腾环境的镜像 `ascendai/cann:8.0.rc1-910b-ubuntu22.04 `_, + 更多的版本可至 `ascendai/cann `_ 获取。 + +启动镜像 +::::::::::::::::: + +.. code-block:: shell + + docker run -itd --network host -v /usr/local/dcmi:/usr/local/dcmi -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi -v /usr/local/Ascend/driver:/usr/local/Ascend/driver -v /etc/ascend_install.info:/etc/ascend_install.info --device /dev/davinci7 --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc --shm-size 16G --name accelerate ascendai/cann:8.0.rc1-910b-ubuntu22.04 bash + +安装 Accelerate 及依赖包 +:::::::::::::::::::::::::: + +.. code-block:: shell + + pip install torch==2.2.0 torch_npu==2.2.0 accelerate -i https://pypi.tuna.tsinghua.edu.cn/simple + diff --git a/_sources/sources/accelerate/quick_start.rst.txt b/_sources/sources/accelerate/quick_start.rst.txt new file mode 100644 index 0000000..8ec41c0 --- /dev/null +++ b/_sources/sources/accelerate/quick_start.rst.txt @@ -0,0 +1,69 @@ +快速开始 +============ + +.. note:: + 阅读本篇前,请确保已按照 :doc:`安装指南 <./install>` 准备好昇腾环境及 Accelerate ! + +本教程以一个简单的 NLP 模型为例,讲述如何使用 Accelerate 在昇腾 NPU 上进行模型的训练。 + +前置准备 +------------ + +本篇将使用到 HuggingFace 其他工具链及 scikit-learn 库,请使用以下指令安装: + +.. code-block:: + + pip install datasets evaluate transformers scikit-learn -i https://pypi.tuna.tsinghua.edu.cn/simple + +本篇样例代码为 Accelrate 官方样例,需提前进行下载 + +.. code-block:: + + git clone https://github.com/huggingface/accelerate.git + +模型训练 +------------ + +.. code-block:: + :linenos: + + # 替换HF域名,方便国内用户进行数据及模型的下载 + export HF_ENDPOINT=https://hf-mirror.com + # 进入项目目录 + cd accelerate/examples + # 模型训练 + python nlp_example.py + +出现如下日志代表训练成功: + +:: + + Downloading builder script: 5.75kB [00:01, 3.69kB/s] + tokenizer_config.json: 100%|████████████████████████████████████████████████████████████████████████████████████████| 49.0/49.0 [00:00<00:00, 237kB/s] + config.json: 570B [00:00, 2.23MB/s] + vocab.txt: 79.5kB [00:12, 3.45kB/s]Error while downloading from https://hf-mirror.com/bert-base-cased/resolve/main/vocab.txt: HTTPSConnectionPool(host='hf-mirror.com', port=443): Read timed out. + Trying to resume download... + vocab.txt: 213kB [00:07, 15.5kB/s]] + vocab.txt: 91.4kB [00:32, 2.81kB/s] + tokenizer.json: 436kB [00:19, 22.8kB/s] + Downloading readme: 35.3kB [00:01, 26.4kB/s] + Downloading data: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 649k/649k [00:02<00:00, 288kB/s] + Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████| 75.7k/75.7k [00:00<00:00, 77.8kB/s] + Downloading data: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 308k/308k [00:01<00:00, 204kB/s] + Generating train split: 100%|███████████████████████████████████████████████████████████████████████████| 3668/3668 [00:00<00:00, 27701.23 examples/s] + Generating validation split: 100%|████████████████████████████████████████████████████████████████████████| 408/408 [00:00<00:00, 73426.42 examples/s] + Generating test split: 100%|███████████████████████████████████████████████████████████████████████████| 1725/1725 [00:00<00:00, 246370.91 examples/s] + Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3668/3668 [00:01<00:00, 3378.05 examples/s] + Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 408/408 [00:00<00:00, 3553.72 examples/s] + Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████| 1725/1725 [00:00<00:00, 5109.03 examples/s] + model.safetensors: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 436M/436M [02:42<00:00, 2.68MB/s] + Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight'] + You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding. + epoch 0: {'accuracy': 0.8014705882352942, 'f1': 0.8439306358381503} + epoch 1: {'accuracy': 0.8578431372549019, 'f1': 0.8975265017667845} + epoch 2: {'accuracy': 0.8700980392156863, 'f1': 0.9087779690189329} diff --git a/_sources/sources/ascend/quick_install.rst.txt b/_sources/sources/ascend/quick_install.rst.txt new file mode 100644 index 0000000..5d0c78c --- /dev/null +++ b/_sources/sources/ascend/quick_install.rst.txt @@ -0,0 +1,232 @@ +快速安装昇腾环境 +================ + +跟随指导,在您的机器上快速安装昇腾环境。 + +1. 系统要求 +---------------- +1.1 前置检查 +^^^^^^^^^^^^^ +确认昇腾AI处理器已经安装妥当 + +.. code-block:: bash + + lspci | grep 'Processing accelerators' + +确认操作系统架构及版本 + +.. code-block:: bash + + uname -m && cat /etc/*release + +确认Python版本 + +.. code-block:: bash + + python --version + + +1.2 软件要求 +^^^^^^^^^^^^^ +======== ======================================== +软件 版本 +======== ======================================== +操作系统 openEuler20.03/22.03, Ubuntu 20.04/22.04 +Python 3.8, 3.9, 3.10 +======== ======================================== + + +2. 环境安装 +------------------ +根据您的需求,选择合适的软件包版本: + +.. warning:: + + 以下文档需要使用非root用户进行安装安装 + +.. raw:: html + + +
+
+
+
安装方式
+
操作系统
+
操作系统版本
+
CPU架构
+
NPU型号
+
昇腾套件版本
+
+
+
+
安装方式
+
直接安装
+
Docker
+
+
+
操作系统
+
openEuler
+
Ubuntu
+
+
+
操作系统版本
+
+
+
CPU架构
+
x86-64
+
aarch64
+
+
+
NPU型号
+
Atlas 800T A2 训练卡
+
Atlas 300I Pro 推理卡
+
+
+
昇腾套件版本
+ +
Driver
+
Firmware
+
+
+
+ +
+ + +3. 卸载 +---------- +**卸载驱动** + +.. code-block:: bash + + sudo /usr/local/Ascend/firmware/script/uninstall.sh + +**卸载固件** + +.. code-block:: bash + + sudo /usr/local/Ascend/driver/script/uninstall.sh + +**卸载CANN-toolkit** + +.. code-block:: bash + + ~/Ascend/ascend-toolkit//{arch}-linux/script/uninstall.sh \ No newline at end of file diff --git a/_sources/sources/deepspeed/index.rst.txt b/_sources/sources/deepspeed/index.rst.txt new file mode 100644 index 0000000..29bff2a --- /dev/null +++ b/_sources/sources/deepspeed/index.rst.txt @@ -0,0 +1,8 @@ +DeepSpeed +=========== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/_sources/sources/deepspeed/install.rst.txt b/_sources/sources/deepspeed/install.rst.txt new file mode 100644 index 0000000..54ed9b0 --- /dev/null +++ b/_sources/sources/deepspeed/install.rst.txt @@ -0,0 +1,73 @@ +安装指南 +============== + +.. note:: + 在本示例之前,请确保已经安装了 `昇腾环境 <../ascend/quick_install.html>`_ 和 `PyTorch <../pytorch/install.html>`_ 环境。 + +1. 安装DeepSpeed +----------------- +安装DeepSpeed最简单的方式是通过 ``pip`` 。 + +.. code-block:: shell + :linenos: + + pip install deepspeed + + +2. 通过源码安装 +------------------ +从 `GitHub `_ 克隆DeepSpeed项目后,可以通过 ``pip`` 来通过源码编译。 + +.. code-block:: shell + :linenos: + + pip install . + + +3. 预编译DeepSpeed算子(可选) +---------------------------------- +如果不想使用JIT编译模式,而想要预编译DeepSpeed算子,可以通过设置环境变量的方式完成算子的预编译。 + +.. code-block:: shell + :linenos: + + DS_BUILD_OPS=1 pip install deepspeed + +4. 安装验证 +----------- + +安装完成后,可以通过 ``ds_report`` 命令查看安装结果 + +.. code-block:: shell + :linenos: + + -------------------------------------------------- + DeepSpeed C++/CUDA extension op report + -------------------------------------------------- + NOTE: Ops not installed will be just-in-time (JIT) compiled at + runtime if needed. Op compatibility means that your system + meet the required dependencies to JIT install the op. + -------------------------------------------------- + JIT compiled ops requires ninja + ninja .................. [OKAY] + -------------------------------------------------- + op name ................ installed .. compatible + -------------------------------------------------- + deepspeed_not_implemented [NO] ....... [OKAY] + async_io ............... [NO] ....... [OKAY] + cpu_adagrad ............ [NO] ....... [OKAY] + cpu_adam ............... [NO] ....... [OKAY] + cpu_lion ............... [NO] ....... [OKAY] + fused_adam ............. [NO] ....... [OKAY] + transformer_inference .. [NO] ....... [OKAY] + -------------------------------------------------- + DeepSpeed general environment info: + torch install path ............... ['/root/miniconda3/envs/ds/lib/python3.10/site-packages/torch'] + torch version .................... 2.2.0 + deepspeed install path ........... ['/root/miniconda3/envs/ds/lib/python3.10/site-packages/deepspeed'] + deepspeed info ................... 0.14.4, unknown, unknown + deepspeed wheel compiled w. ...... torch 2.2 + torch_npu install path ........... ['/root/miniconda3/envs/ds/lib/python3.10/site-packages/torch_npu'] + torch_npu version ................ 2.2.0 + ascend_cann version .............. 8.0.RC2.alpha002 + shared memory (/dev/shm) size .... 20.00 GB diff --git a/_sources/sources/deepspeed/quick_start.rst.txt b/_sources/sources/deepspeed/quick_start.rst.txt new file mode 100644 index 0000000..00baae3 --- /dev/null +++ b/_sources/sources/deepspeed/quick_start.rst.txt @@ -0,0 +1,34 @@ +快速开始 +========== + +.. note:: + 在本示例之前,请确保已经安装了 `DeepSpeed <./install.html>`_ 环境。 如果还未安装,可以执行 ``pip install deepspeed`` 完成安装。 + + +1. 使用DeepSpeed多卡并行训练 +------------------------------- +以下代码使用了cifar10数据集,使用DeepSpeed训练模型在多张NPU卡上进行模型训练(来自 `DeepSpeed Examples `_),自DeepSpeed v0.12.6之后,代码无需任何修改,即可自动检测NPU并进行训练。 + +.. rli:: https://raw.githubusercontent.com/microsoft/DeepSpeedExamples/master/training/cifar/cifar10_deepspeed.py + :language: python + :linenos: + +2. 训练结果查看 +---------------- +训练完成后,会打印模型对图像识别的结果。 + +.. code-block:: shell + :linenos: + + Finished Training + Accuracy of the network on the 10000 test images: 57 % + Accuracy of plane : 65 % + Accuracy of car : 67 % + Accuracy of bird : 52 % + Accuracy of cat : 34 % + Accuracy of deer : 52 % + Accuracy of dog : 49 % + Accuracy of frog : 59 % + Accuracy of horse : 66 % + Accuracy of ship : 66 % + Accuracy of truck : 56 % diff --git a/_sources/sources/llama_cpp/index.rst.txt b/_sources/sources/llama_cpp/index.rst.txt new file mode 100644 index 0000000..3945d2e --- /dev/null +++ b/_sources/sources/llama_cpp/index.rst.txt @@ -0,0 +1,10 @@ +llama.cpp +=========== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst + + diff --git a/_sources/sources/llama_cpp/install.rst.txt b/_sources/sources/llama_cpp/install.rst.txt new file mode 100644 index 0000000..bdd3f1c --- /dev/null +++ b/_sources/sources/llama_cpp/install.rst.txt @@ -0,0 +1,119 @@ +安装指南 +============== + +本教程面向使用 llama.cpp & 昇腾的开发者,帮助完成昇腾环境下 llama.cpp 的安装。 + +.. note:: + 目前 llama.cpp 仅支持 Atlas 300T A2 型号设备 + +llama.cpp 下载安装 +--------------------------- + +此处提供源码安装和 docker 两种安装方式,请按需选择: + +.. raw:: html + + +
+
+
+
+
安装方式
+
源码安装
+
Docker
+
+
+
+
+ +--------------- + +.. raw:: html + +
+

使用源代码安装

+
+

备注

+

请确保已经根据快速安装昇腾环境指引安装了对应的CANN-toolkit版本以及相应的固件和驱动,并应用了CANN-toolkit环境变量。

+
+
+

提示

+

LLAMA-Factory 支持的 CANN 最低版本为 8.0.rc1。安装 CANN 时,请同时安装 Kernel 算子包。

+
+ +

获取源代码

+
+

使用以下 git 指令获取源码

+
+
git clone https://github.com/ggerganov/llama.cpp
+    cd llama.cpp
+
+
+ +

构建 llama.cpp

+
+
+
cmake -B build -DGGML_CANN=on -DCMAKE_BUILD_TYPE=release
+    cmake --build build --config release
+
+
+ +
+ +
+
+

使用 Docker

+
+

备注

+

请确保已经根据快速安装昇腾环境指引安装了对应的的固件和驱动。

+
+
+

提示

+

更多 CANN 的基础镜像选择见ascendai/cann

+
+

构建 docker 镜像:

+
+
git clone https://github.com/ggerganov/llama.cpp
+    cd llama.cpp
+    docker build -t llama-cpp-cann -f .devops/llama-cli-cann.Dockerfile .
+
+

找到所有卡的运行信息:

+
+
npu-smi info
+
+

启动 docker 容器:

+
+
docker run --name llamacpp \
+    --device /dev/davinci0  \
+    --device /dev/davinci_manager \
+    --device /dev/devmm_svm \
+    --device /dev/hisi_hdc \
+    -v /usr/local/dcmi:/usr/local/dcmi \
+    -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
+    -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ \
+    -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \
+    -v /PATH_TO_YOUR_MODELS/:/app/models \
+    -it llama-cpp-cann -m /app/models/MODEL_PATH -ngl 32 \
+    -p "Building a website can be done in 10 simple steps:"
+
+
+
+ +安装校验 +----------------- + +安装完成后,无任何报错信息,即为安装成功,下面为部分回显信息: + +.. code-block:: shell + :linenos: + + [ 97%] Built target test-grammar-integration + [ 97%] Built target llama-speculative + [ 97%] Built target llama-perplexity + [ 98%] Linking CXX executable ../../bin/llama-bench + [ 98%] Linking CXX executable ../bin/test-json-schema-to-grammar + [ 98%] Built target llama-bench + [ 98%] Built target test-json-schema-to-grammar + [100%] Linking CXX executable ../../bin/llama-server + [100%] Built target llama-server + diff --git a/_sources/sources/llama_cpp/quick_start.rst.txt b/_sources/sources/llama_cpp/quick_start.rst.txt new file mode 100644 index 0000000..dd52aff --- /dev/null +++ b/_sources/sources/llama_cpp/quick_start.rst.txt @@ -0,0 +1,179 @@ +快速开始 +============ + +.. note:: + 阅读本篇前,请确保已按照 :doc:`安装指南 <./install>` 准备好昇腾环境及 llama.cpp ! + +本教程聚焦大语言模型(Large Language Model,LLM)的推理过程,以 Qwen2.5-7B 模型为例,讲述如何使用 llama.cpp 在昇腾 NPU 上进行推理。 + + +模型文件准备及量化 +--------------- + +llama.cpp 的推理需要使用 gguf 格式文件,llama.cpp 提供了两种方式转换 Hugging Face 模型文件: + +- 使用 `GGUF-my-repo `_ 将模型进行转换。 + +- 使用项目中的 `convert_hf_to_gguf.py` 文件将 Hugging Face 模型转换为 gguf 格式: + + .. code-block:: shell + :linenos: + + python convert_hf_to_gguf.py path/to/model + +详情请参考 `Prepare and Quantize `_ 。 + +注意:目前仅支持 FP16 精度及 Q4_0/Q8_0 量化模型。 + +推理 +------------ + +有两种设备选择模式: + +- 单设备:使用用户指定的一个设备目标。 +- 多设备:自动选择具有相同后端的设备。 + ++---------------+---------------------------------------------+ +| 设备选择 | 参数 | ++===============+=============================================+ +| 单设备 | --split-mode none --main-gpu DEVICE_ID | ++---------------+---------------------------------------------+ +| 多设备 | --split-mode layer (default) | ++---------------+---------------------------------------------+ + +使用单卡推理 +++++++++++++++++ + +.. code-block:: shell + :linenos: + + ./build/bin/llama-cli -m path_to_model -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm none -mg 0 + +使用多卡推理 +++++++++++++++++ + +.. code-block:: shell + :linenos: + + ./build/bin/llama-cli -m path_to_model -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm layer + +以下为正常推理结果: + +.. code-block:: shell + :linenos: + + Log start + main: build = 3520 (8e707118) + main: built with cc (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 for aarch64-linux-gnu + main: seed = 1728907816 + llama_model_loader: loaded meta data with 22 key-value pairs and 291 tensors from /home/jiahao/models/llama3-8b-instruct-fp16.gguf (version GGUF V3 (latest)) + llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. + llama_model_loader: - kv 0: general.architecture str = llama + llama_model_loader: - kv 1: general.name str = Meta-Llama-3-8B-Instruct + llama_model_loader: - kv 2: llama.block_count u32 = 32 + llama_model_loader: - kv 3: llama.context_length u32 = 8192 + llama_model_loader: - kv 4: llama.embedding_length u32 = 4096 + llama_model_loader: - kv 5: llama.feed_forward_length u32 = 14336 + llama_model_loader: - kv 6: llama.attention.head_count u32 = 32 + llama_model_loader: - kv 7: llama.attention.head_count_kv u32 = 8 + llama_model_loader: - kv 8: llama.rope.freq_base f32 = 500000.000000 + llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 = 0.000010 + llama_model_loader: - kv 10: general.file_type u32 = 1 + llama_model_loader: - kv 11: llama.vocab_size u32 = 128256 + llama_model_loader: - kv 12: llama.rope.dimension_count u32 = 128 + llama_model_loader: - kv 13: tokenizer.ggml.model str = gpt2 + llama_model_loader: - kv 14: tokenizer.ggml.pre str = llama-bpe + llama_model_loader: - kv 15: tokenizer.ggml.tokens arr[str,128256] = ["!", "\"", "#", "$", "%", "&", "'", ... + llama_model_loader: - kv 16: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... + llama_model_loader: - kv 17: tokenizer.ggml.merges arr[str,280147] = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "... + llama_model_loader: - kv 18: tokenizer.ggml.bos_token_id u32 = 128000 + llama_model_loader: - kv 19: tokenizer.ggml.eos_token_id u32 = 128009 + llama_model_loader: - kv 20: tokenizer.chat_template str = {% set loop_messages = messages %}{% ... + llama_model_loader: - kv 21: general.quantization_version u32 = 2 + llama_model_loader: - type f32: 65 tensors + llama_model_loader: - type f16: 226 tensors + llm_load_vocab: special tokens cache size = 256 + llm_load_vocab: token to piece cache size = 0.8000 MB + llm_load_print_meta: format = GGUF V3 (latest) + llm_load_print_meta: arch = llama + llm_load_print_meta: vocab type = BPE + llm_load_print_meta: n_vocab = 128256 + llm_load_print_meta: n_merges = 280147 + llm_load_print_meta: vocab_only = 0 + llm_load_print_meta: n_ctx_train = 8192 + llm_load_print_meta: n_embd = 4096 + llm_load_print_meta: n_layer = 32 + llm_load_print_meta: n_head = 32 + llm_load_print_meta: n_head_kv = 8 + llm_load_print_meta: n_rot = 128 + llm_load_print_meta: n_swa = 0 + llm_load_print_meta: n_embd_head_k = 128 + llm_load_print_meta: n_embd_head_v = 128 + llm_load_print_meta: n_gqa = 4 + llm_load_print_meta: n_embd_k_gqa = 1024 + llm_load_print_meta: n_embd_v_gqa = 1024 + llm_load_print_meta: f_norm_eps = 0.0e+00 + llm_load_print_meta: f_norm_rms_eps = 1.0e-05 + llm_load_print_meta: f_clamp_kqv = 0.0e+00 + llm_load_print_meta: f_max_alibi_bias = 0.0e+00 + llm_load_print_meta: f_logit_scale = 0.0e+00 + llm_load_print_meta: n_ff = 14336 + llm_load_print_meta: n_expert = 0 + llm_load_print_meta: n_expert_used = 0 + llm_load_print_meta: causal attn = 1 + llm_load_print_meta: pooling type = 0 + llm_load_print_meta: rope type = 0 + llm_load_print_meta: rope scaling = linear + llm_load_print_meta: freq_base_train = 500000.0 + llm_load_print_meta: freq_scale_train = 1 + llm_load_print_meta: n_ctx_orig_yarn = 8192 + llm_load_print_meta: rope_finetuned = unknown + llm_load_print_meta: ssm_d_conv = 0 + llm_load_print_meta: ssm_d_inner = 0 + llm_load_print_meta: ssm_d_state = 0 + llm_load_print_meta: ssm_dt_rank = 0 + llm_load_print_meta: model type = 8B + llm_load_print_meta: model ftype = F16 + llm_load_print_meta: model params = 8.03 B + llm_load_print_meta: model size = 14.96 GiB (16.00 BPW) + llm_load_print_meta: general.name = Meta-Llama-3-8B-Instruct + llm_load_print_meta: BOS token = 128000 '<|begin_of_text|>' + llm_load_print_meta: EOS token = 128009 '<|eot_id|>' + llm_load_print_meta: LF token = 128 'Ä' + llm_load_print_meta: EOT token = 128009 '<|eot_id|>' + llm_load_print_meta: max token length = 256 + llm_load_tensors: ggml ctx size = 0.27 MiB + llm_load_tensors: CPU buffer size = 15317.02 MiB + llm_load_tensors: CANN buffer size = 13313.00 MiB + ......................................................................................... + llama_new_context_with_model: n_ctx = 8192 + llama_new_context_with_model: n_batch = 2048 + llama_new_context_with_model: n_ubatch = 512 + llama_new_context_with_model: flash_attn = 0 + llama_new_context_with_model: freq_base = 500000.0 + llama_new_context_with_model: freq_scale = 1 + llama_kv_cache_init: CANN KV buffer size = 1024.00 MiB + llama_new_context_with_model: KV self size = 1024.00 MiB, K (f16): 512.00 MiB, V (f16): 512.00 MiB + llama_new_context_with_model: CPU output buffer size = 0.49 MiB + llama_new_context_with_model: CANN compute buffer size = 1260.50 MiB + llama_new_context_with_model: CPU compute buffer size = 24.01 MiB + llama_new_context_with_model: graph nodes = 1030 + llama_new_context_with_model: graph splits = 4 + + system_info: n_threads = 192 / 192 | AVX = 0 | AVX_VNNI = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | FMA = 0 | NEON = 1 | SVE = 0 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | MATMUL_INT8 = 0 | LLAMAFILE = 1 | + sampling: + repeat_last_n = 64, repeat_penalty = 1.000, frequency_penalty = 0.000, presence_penalty = 0.000 + top_k = 40, tfs_z = 1.000, top_p = 0.950, min_p = 0.050, typical_p = 1.000, temp = 0.800 + mirostat = 0, mirostat_lr = 0.100, mirostat_ent = 5.000 + sampling order: + CFG -> Penalties -> top_k -> tfs_z -> typical_p -> top_p -> min_p -> temperature + generate: n_ctx = 8192, n_batch = 2048, n_predict = -1, n_keep = 1 + + + Building a website can be done in 10 simple steps: 1. Define your website's purpose and target audience 2. Choose a domain name and register it with a registrar 3. Select a web hosting service and set up your hosting account 4. Design your website's layout and structure 5. Create content for your website, including text, images, and other media 6. Build a responsive website design that adapts to different devices and screen sizes 7. Choose a Content Management System (CMS) and install it on your website 8. Customize your website's design and layout using a CMS + + llama_print_timings: load time = 9074.69 ms + llama_print_timings: sample time = 31.97 ms / 112 runs ( 0.29 ms per token, 3503.28 tokens per second) + llama_print_timings: prompt eval time = 238.53 ms / 13 tokens ( 18.35 ms per token, 54.50 tokens per second) + llama_print_timings: eval time = 13152.29 ms / 111 runs ( 118.49 ms per token, 8.44 tokens per second) + llama_print_timings: total time = 13623.53 ms / 124 tokens \ No newline at end of file diff --git a/_sources/sources/llamafactory/example.rst.txt b/_sources/sources/llamafactory/example.rst.txt new file mode 100644 index 0000000..0eff89b --- /dev/null +++ b/_sources/sources/llamafactory/example.rst.txt @@ -0,0 +1,274 @@ +全流程昇腾实践 +===================== + +开始本篇之前,请阅读 `LLaMA-Factory QuickStart `_ 了解 LLaMA-Factory 及其主要功能的用法, +并参考 :doc:`安装指南 <./install>` 及 :doc:`快速开始 <./quick_start>` 完成基本的环境准备、LLaMA-Factory 安装及简单的微调和推理功能。 +本篇在此基础上,以 Qwen1.5-7B 模型为例,帮助开发者在昇腾 NPU 上使用 LLaMA-Factory 更多实用特性。 + +`LLaMA-Factory QuickStart `_ 中详解了下列 9 种功能,本教程为在 NPU 上全流程实践示例, +有关功能及参数的详细解析请参考 `LLaMA-Factory QuickStart `_ + + +1. 原始模型直接推理 +2. 自定义数据集构建 +3. 基于 LoRA 的 sft 指令微调 +4. 动态合并 LoRA 的推理 +5. 批量预测和训练效果评估 +6. LoRA模型合并导出 +7. 一站式 webui board 的使用 +8. API Server的启动与调用 +9. 大模型主流评测 benchmark + +前置准备 +-------- + +安装准备 +~~~~~~~~~ + +请确认已按照 :doc:`安装指南 <./install>` 安装 CANN 和 LLaMA-Factory 并完成安装校验。 + +配置文件准备 +~~~~~~~~~~~~ + +本示例中用到的参数配置文件与快速开始 :ref:`qwen1_5_lora_sft_ds.yaml ` 中一致,可参考快速开始。 + + +原始模型直接推理 +----------------- + +验证 LLaMA-Factory 在昇腾 NPU 上推理功能是否正常: + +.. code-block:: shell + :linenos: + + ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli webchat --model_name_or_path qwen/Qwen1.5-7B \ + --adapter_name_or_path saves/Qwen1.5-7B/lora/sft \ + --template qwen \ + --finetuning_type lora + +如下图所示可正常进行对话,即为可正常推理: + +.. figure:: ./images/webchat.png + :align: center + +自定义数据集构建 +------------------- + +本篇用到的数据集为 LLaMA-Factory 自带的 identity 和 alpaca_en_demo,对 identity 数据集进行如下全局替换即可实现定制指令: + +- ``{{name}}`` 替换为 ``Ascend-helper`` +- ``{{author}}`` 替换为 ``Ascend`` + +更多自定义数据集的构建请参考 `官方数据集构造指引 `_ 。 + +.. _sft: + +基于 LoRA 的 sft 指令微调 +------------------------- +在 :doc:`快速开始 <./quick_start>` 中,已经尝试过使用 src/train.py 为入口的微调脚本,本篇中均使用 llamafactory-cli 命令启动微调、推理等程序。 + +.. code-block:: shell + :linenos: + + ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train /qwen1_5_lora_sft_ds.yaml + + +动态合并 LoRA 的推理 +------------------------- + +.. code-block:: shell + :linenos: + + ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli chat --model_name_or_path qwen/Qwen1.5-7B \ + --adapter_name_or_path saves/Qwen1.5-7B/lora/sft \ + --template qwen \ + --finetuning_type lora + +通过询问大模型是谁检验 sft 指令微调的成果,如下图,大模型回答自己是 Ascend-helper 说明 sft 成功,如失败,可返回 :ref:`sft` 增加训练轮数重新训练。 + +.. figure:: ./images/sft-chat.gif + :align: center + + +批量预测和训练效果评估 +------------------------ + +使用批量预测和评估前,需先安装 jieba、rouge-chinese、nltk 三个库: + +.. code-block:: shell + :linenos: + + pip install jieba,rouge-chinese,nltk -i https://pypi.tuna.tsinghua.edu.cn/simple + +然后使用以下指令对微调后的模型在 alpaca_gpt4_zh 和 identity 数据集上进行批量预测和效果评估: + +.. code-block:: shell + :linenos: + + ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train \ + --stage sft \ + --do_predict \ + --model_name_or_path qwen/Qwen1.5-7B \ + --adapter_name_or_path ./saves/Qwen1.5-7B/lora/sft \ + --dataset alpaca_gpt4_zh,identity \ + --dataset_dir ./data \ + --template qwen \ + --finetuning_type lora \ + --output_dir ./saves/Qwen1.5-7B/lora/predict \ + --overwrite_cache \ + --overwrite_output_dir \ + --cutoff_len 1024 \ + --preprocessing_num_workers 16 \ + --per_device_eval_batch_size 1 \ + --max_samples 20 \ + --predict_with_generate + +完成批量预测与评估后,在指定的输出路径下会看到以下文件: + +.. code-block:: shell + + all_results.json + generated_predictions.jsonl + predict_results.json + trainer_log.jsonl + +``generated_predictions.json`` 中为所有测试样本的输入提示词 prompt、标签 label 和模型输出预测结果 predict,下面是其中一个示例: + +.. code-block:: json + + {"prompt": "system\nYou are a helpful assistant.\nuser\n保持健康的三个提示。\nassistant\n", "label": "以下是保持健康的三个提示:\n\n1. 保持身体活动。每天做适当的身体运动,如散步、跑步或游泳,能促进心血管健康,增强肌肉力量,并有助于减少体重。\n\n2. 均衡饮食。每天食用新鲜的蔬菜、水果、全谷物和脂肪含量低的蛋白质食物,避免高糖、高脂肪和加工食品,以保持健康的饮食习惯。\n\n3. 睡眠充足。睡眠对人体健康至关重要,成年人每天应保证 7-8 小时的睡眠。良好的睡眠有助于减轻压力,促进身体恢复,并提高注意力和记忆力。", "predict": "保持健康的三个提示包括:1. 均衡饮食:饮食应包括各种食物,如蔬菜、水果、全麦面包、蛋白质和健康脂肪,以满足身体的营养需求。\n2. 锻炼:每周至少进行150分钟的中等强度有氧运动,如快走、跑步、游泳或骑自行车,以及至少两次力量训练,以帮助维持身体健康。\n3. 睡眠:保持规律的睡眠习惯,每晚至少睡7-8小时,以帮助身体恢复和充电。"} + + +``predict_results.json`` 中即为训练效果评估所得结果: + +.. code-block:: yaml + + { + "predict_bleu-4": 50.941235, + "predict_rouge-1": 65.7085975, + "predict_rouge-2": 52.576409999999996, + "predict_rouge-l": 60.487535, + "predict_runtime": 196.1634, + "predict_samples_per_second": 0.204, + "predict_steps_per_second": 0.204 + } + + +LoRA 模型合并导出 +------------------ + +LoRA 模型合并和导出时,可通过指定 ``export_device`` 参数为 ``auto`` 来自动检测当前加速卡环境, +启用 NPU 作为导出设备: + +.. 端到端导出 Qwen1.5-7B LoRA 模型比 cpu 快 37.3% 左右。 + +.. code-block:: shell + :linenos: + + ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli export \ + --model_name_or_path qwen/Qwen1.5-7B \ + --adapter_name_or_path ./saves/Qwen1.5-7B/lora/sft \ + --template qwen \ + --finetuning_type lora \ + --export_dir ./saves/Qwen1.5-7B/lora/megred-model-path \ + --export_size 2 \ + --export_device auto \ + --export_legacy_format False + +一站式 webui board 的使用 +---------------------------- + +使用 webui 可零代码实现以上功能,启动命令如下: + +.. code-block:: shell + :linenos: + + ASCEND_RT_VISIBLE_DEVICES=0 GRADIO_SHARE=0 GRADIO_SERVER_PORT=7007 GRADIO_SERVER_NAME="0.0.0.0" llamafactory-cli webui + +在 webui 实现 Qwen1.5-7B 模型的 LoRA 模型微调、动态推理和模型导出的操作示例: + +.. raw:: html + + + + +API Server的启动与调用 +-------------------------- + +``API_PORT`` 为 API 服务的端口号,可替换为自定义端口。通过以下命令启动 API 服务: + +.. code-block:: shell + :linenos: + + ASCEND_RT_VISIBLE_DEVICES=0 API_PORT=7007 llamafactory-cli api \ + --model_name_or_path qwen/Qwen1.5-7B \ + --adapter_name_or_path ./saves/Qwen1.5-7B/lora/sft \ + --template qwen \ + --finetuning_type lora + +终端输出如下关键信息时,即可在下游任务重通过 API 调用 Qwen1.5-7B + +.. code-block:: shell + :linenos: + + Visit http://localhost:7007/docs for API document. + INFO: Started server process [2261535] + INFO: Waiting for application startup. + INFO: Application startup complete. + INFO: Uvicorn running on http://0.0.0.0:7007 (Press CTRL+C to quit) + +使用 API 调用 Qwen1.5-7B 实现问答聊天的示例代码,通过 ``message`` 传入您的问题: + +.. code-block:: python + :linenos: + + import os + from openai import OpenAI + from transformers.utils.versions import require_version + + require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0") + + if __name__ == '__main__': + # change to your custom port + port = 7007 + client = OpenAI( + api_key="0", + base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 7007)), + ) + messages = [] + messages.append({"role": "user", "content": "hello, what is Ascend NPU"}) + result = client.chat.completions.create(messages=messages, model="test") + print(result.choices[0].message) + +执行成功后可在终端看到如下输出,Qwen1.5-7B 正确介绍了 Ascend NPU: + +.. code-block:: shell + + ChatCompletionMessage(content='The Ascend NPU, or Neural Processing Unit, is an AI chip developed by Huawei that is designed to accelerate the performance of deep learning and artificial intelligence workloads. It is specifically designed to be energy-efficient, and is intended to be used in a wide range of devices, from smartphones to data centers. The Ascend NPU is designed to support a variety of AI workloads, including object detection, natural language processing, and speech recognition.', role='assistant', function_call=None, tool_calls=None) + +进阶-大模型主流评测 benchmark +-------------------------------- + +通过以下指令启动对 Qwen1.5-7B 模型在 mmlu 数据集的评测: + +.. code-block:: shell + :linenos: + + llamafactory-cli eval \ + --model_name_or_path qwen/Qwen1.5-7B \ + --template fewshot \ + --task mmlu \ + --split validation \ + --lang en \ + --n_shot 5 \ + --batch_size 1 + +评测完成后,终端输出的评测结果如下,与 Qwen1.5-7B 官方报告对齐: + +.. code-block:: shell + + Average: 61.79 + STEM: 54.83 + Social Sciences: 73.00 + Humanities: 55.02 + Other: 67.32 diff --git a/_sources/sources/llamafactory/faq.rst.txt b/_sources/sources/llamafactory/faq.rst.txt new file mode 100644 index 0000000..2b84292 --- /dev/null +++ b/_sources/sources/llamafactory/faq.rst.txt @@ -0,0 +1,120 @@ +FAQ +======= + +设备指定 +-------- + +**Q:为什么我的 NPU 卡没调用起来?** + +1. 通过 ``ASCEND_RT_VISIBLE_DEVICES`` 环境变量指定昇腾 NPU 卡,如 ``ASCEND_RT_VISIBLE_DEVICES=0,1,2,3`` 指定使用 0,1,2,3四张 NPU 卡进行微调/推理。 + +.. hint:: + + 昇腾 NPU 卡从 0 开始编号,docker 容器内也是如此; + 如映射物理机上的 6,7 号 NPU 卡到容器内使用,其对应的卡号分别为 0,1 + +2. 检查是否安装 torch-npu,建议通过 ``pip install -e '.[torch-npu,metrics]'`` 安装 LLaMA-Factory。 + +推理报错 +---------- + +**Q:使用昇腾 NPU 推理报错 RuntimeError: ACL stream synchronize failed, error code:507018** + +A:设置 do_sample: false,取消随机抽样策略 + +关联 issues: + +- https://github.com/hiyouga/LLaMA-Factory/issues/3840 + +微调/训练报错 +-------------- + +**Q:使用 ChatGLM 系列模型微调/训练模型时,报错 NotImplementedError: Unknown device for graph fuser** + +A:在 modelscope 或 huggingface 下载的 repo 里修改 ``modeling_chatglm.py`` 代码,取消 torch.jit 装饰器注释 + +关联 issues: + +- https://github.com/hiyouga/LLaMA-Factory/issues/3788 +- https://github.com/hiyouga/LLaMA-Factory/issues/4228 + + +**Q:微调/训练启动后,HCCL 报错,包含如下关键信息:** + +.. code-block:: shell + + RuntimeError: [ERROR] HCCL error in: torch_npu/csrc/distributed/ProcessGroupHCCL.cpp:64 + [ERROR] 2024-05-21-11:57:54 (PID:927000, Device:3, RankID:3) ERR02200 DIST call hccl api failed. + EJ0001: 2024-05-21-11:57:54.167.645 Failed to initialize the HCCP process. Reason: Maybe the last training process is running. + Solution: Wait for 10s after killing the last training process and try again. + TraceBack (most recent call last): + tsd client wait response fail, device response code[1]. unknown device error.[FUNC:WaitRsp][FILE:process_mode_manager.cpp][LINE:290] + Fail to get sq reg virtual addr, deviceId=3, sqId=40.[FUNC:Setup][FILE:stream.cc][LINE:1102] + stream setup failed, retCode=0x7020010.[FUNC:SyncGetDevMsg][FILE:api_impl.cc][LINE:4643] + Sync get device msg failed, retCode=0x7020010.[FUNC:GetDevErrMsg][FILE:api_impl.cc][LINE:4704] + rtGetDevMsg execute failed, reason=[driver error:internal error][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53] + +A:杀掉 device 侧所有进程,等待 10s 后重新启动训练。 + +关联 issues: + +- https://github.com/hiyouga/LLaMA-Factory/issues/3839 + +.. **Q:微调 ChatGLM3 使用 fp16 报错 Gradient overflow. Skipping step Loss scaler reducing loss scale to ...;使用 bf16 时 'loss': 0.0, 'grad_norm': nan** +.. https://github.com/hiyouga/LLaMA-Factory/issues/3308 + + +**Q:使用 TeleChat 模型在昇腾 NPU 推理时,报错 AssertionError: Torch not compiled with CUDA enabled** + +A:此问题一般由代码中包含 cuda 相关硬编码造成,根据报错信息,找到 cuda 硬编码所在位置,对应修改为 NPU 代码。如 ``.cuda()`` 替换为 ``.npu()`` ; ``.to("cuda")`` 替换为 ``.to("npu")`` + +**Q:模型微调遇到报错 DeviceType must be NPU. Actual DeviceType is: cpu,例如下列报错信息** + +.. code-block:: shell + + File "/usr/local/pyenv/versions/3.10.13/envs/x/lib/python3.10/site-packages/transformers-4.41.1-py3.10.egg/transformers/generation/utils.py", line 1842, in generate + result = self._sample( + File "/usr/local/pyenv/versions/3.10.13/envs/x/lib/python3.10/site-packages/transformers-4.41.1-py3.10.egg/transformers/generation/utils.py", line 2568, in _sample + next_tokens = next_tokens * unfinished_sequences + \ + RuntimeError: t == c10::DeviceType::PrivateUse1 INTERNAL ASSERT FAILED at "third_party/op-plugin/op_plugin/ops/base_ops/opapi/MulKernelNpuOpApi.cpp":26, please report a bug to PyTorch. DeviceType must be NPU. Actual DeviceType is: cpu + [ERROR] 2024-05-29-17:04:48 (PID:70209, Device:0, RankID:-1) ERR00001 PTA invalid parameter + +A:此类报错通常为部分 Tensor 未放到 NPU 上,请确保报错中算子所涉及的操作数均在 NPU 上。如上面的报错中,MulKernelNpuOpApi 算子为乘法算子,应确保 next_tokens 和 unfinished_sequences 均已放在 NPU 上。 + + +.. **Q:单卡 NPU 情况下,使用 DeepSpeed 训练模型,报错 AttributeError :'GemmaForCausalLM'obiect has no attribute"save checkpoint",此处 GemmaForCausalLM 还可能为其他模型,详细报错如下图** + +**Q:单卡 NPU 情况下,使用 DeepSpeed 训练模型,报错 AttributeError :'GemmaForCausalLM'obiect has no attribute"save checkpoint",此处 GemmaForCausalLM 还可能为其他模型** + +.. .. figure:: ./images/lf-bugfix.png +.. :align: center + +A:此问题一般为使用 ``python src/train.py`` 启动训练脚本或使用 ``llamafactory-cli train`` 的同时设置环境变量 ``FORCE_TORCHRUN`` 为 false 或 0 时出现。 +由于 DeepSpeed 只对分布式 launcher 启动的程序中的模型用 ``DeepSpeedEngine`` 包装,包装后才有 ``save_checkpoint`` 等方法。 +因此使用 ``torchrun`` 启动训练即可解决问题,即: + +.. code-block:: shell + + torchrun --nproc_per_node $NPROC_PER_NODE \ + --nnodes $NNODES \ + --node_rank $RANK \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT \ + src/train.py + +同时使用 ``llamafactory-cli train`` 和 DeepSpeed 时,LLaMA-Factory 将自动设置 ``FORCE_TORCHRUN`` 为 1,启动分布式训练。如果您的代码中没有这个功能,请更新 LLaMA-Factory 为最新代码。 + +关联 issue 及 PR: + +- https://github.com/hiyouga/LLaMA-Factory/issues/4077 +- https://github.com/hiyouga/LLaMA-Factory/pull/4082 + + + +问题反馈 +---------- + +如果您遇到任何问题,欢迎在 `官方社区 `_ 提 issue,或在 `LLAMA-Factory × 昇腾交流群 `_ 内提问,我们将第一时间进行响应。 + +*持续更新中 ...* + diff --git a/_sources/sources/llamafactory/index.rst.txt b/_sources/sources/llamafactory/index.rst.txt new file mode 100644 index 0000000..b45f005 --- /dev/null +++ b/_sources/sources/llamafactory/index.rst.txt @@ -0,0 +1,11 @@ +LLaMA-Factory +============== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst + multi_npu.rst + example.rst + faq.rst diff --git a/_sources/sources/llamafactory/install.rst.txt b/_sources/sources/llamafactory/install.rst.txt new file mode 100644 index 0000000..69aaf63 --- /dev/null +++ b/_sources/sources/llamafactory/install.rst.txt @@ -0,0 +1,166 @@ +安装指南 +============== + +本教程面向使用 LLAMA-Factory & 昇腾的开发者,帮助完成昇腾环境下 LLaMA-Factory 的安装。 + +LLAMA-Factory 下载安装 +--------------------------- + +下载 LLAMA-Factory 并进入项目目录,本文档所有操作均在该目录下进行: + +.. code-block:: shell + :linenos: + + git clone https://github.com/hiyouga/LLaMA-Factory.git + cd LLaMA-Factory + +此处提供 docker 和 pip 两种安装方式,请按需选择: + +.. raw:: html + + +
+
+
+
+
安装方式
+
Docker
+
pip
+
+
+
+
+ +--------------- + +.. raw:: html + +
+

使用 pip

+
+

备注

+

请确保已经根据快速安装昇腾环境指引安装了对应的CANN-toolkit版本以及相应的固件和驱动,并应用了CANN-toolkit环境变量。

+
+
+

警告

+

LLAMA-Factory 支持的 CANN 最低版本为 8.0.rc1。安装 CANN 时,请同时安装 Kernel 算子包。

+
+

Python 环境创建

+
+

创建并激活 Python 环境:

+
+
conda create -y -n llamafactory python=3.10
+  conda activate llamafactory
+
+
+

LLaMA-Factory 安装

+
+

使用以下指令安装带有 torch-npu 的 LLaMA-Factory:

+
+
pip install -e ".[torch-npu,metrics]" -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+ +
+
+

使用 Docker

+
+

备注

+

请确保已经根据快速安装昇腾环境指引安装了对应的的固件和驱动。

+
+
+

提示

+

更多 CANN 的基础镜像选择见ascendai/cann

+
+

此处提供使用 docker-compose 构建及启动 docker 容器和不使用 docker-compose 两种构建方式,请根据需求选择其一。

+ +
+

使用 docker-compose 构建及启动 docker 容器

+ +

进入存放 Dockerfile 及 docker-compose.yaml 的 docker-npu 目录:

+
+
cd docker/docker-npu
+
+

构建 docker 镜像并启动 docker 容器:

+
+
docker-compose up -d
+
+ +

进入 docker 容器:

+
+
docker exec -it llamafactory bash
+
+ + +
+

不使用 docker-compose

+

构建 docker 镜像:

+
+
docker build -f ./docker/docker-npu/Dockerfile --build-arg INSTALL_DEEPSPEED=false --build-arg PIP_INDEX=https://pypi.org/simple -t llamafactory:latest .
+
+

启动 docker 容器:

+
+
docker run -dit \
+    -v ./hf_cache:/root/.cache/huggingface \
+    -v ./ms_cache:/root/.cache/modelscope \
+    -v ./data:/app/data \
+    -v ./output:/app/output \
+    -v /usr/local/dcmi:/usr/local/dcmi \
+    -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
+    -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+    -v /etc/ascend_install.info:/etc/ascend_install.info \
+    -p 7860:7860 \
+    -p 8000:8000 \
+    --device /dev/davinci0 \
+    --device /dev/davinci_manager \
+    --device /dev/devmm_svm \
+    --device /dev/hisi_hdc \
+    --shm-size 16G \
+    --name llamafactory \
+    llamafactory:latest
+
+

进入 docker 容器:

+
+
docker exec -it llamafactory bash
+
+
+ +
+
+
+ + + +安装校验 +---------------------- + +使用以下指令对 LLaMA-Factory × 昇腾的安装进行校验: + +.. code-block:: shell + + llamafactory-cli env + +如下所示,正确显示 LLaMA-Factory、PyTorch NPU 和 CANN 版本号及 NPU 型号等信息即说明安装成功。 + +.. code-block:: shell + + - `llamafactory` version: 0.8.2.dev0 + - Platform: Linux-4.19.90-vhulk2211.3.0.h1543.eulerosv2r10.aarch64-aarch64-with-glibc2.31 + - Python version: 3.10.14 + - PyTorch version: 2.1.0 (NPU) + - Transformers version: 4.41.2 + - Datasets version: 2.19.2 + - Accelerate version: 0.31.0 + - PEFT version: 0.11.1 + - TRL version: 0.9.4 + - NPU type: xxx + - CANN version: 8.0.RC2.alpha001 + +LLaMA-Factory 卸载 +---------------------- + +.. code-block:: shell + :linenos: + + pip uninstall llamafactory diff --git a/_sources/sources/llamafactory/multi_npu.rst.txt b/_sources/sources/llamafactory/multi_npu.rst.txt new file mode 100644 index 0000000..b1f953a --- /dev/null +++ b/_sources/sources/llamafactory/multi_npu.rst.txt @@ -0,0 +1,55 @@ +单机多卡微调 +============== + +.. note:: + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 LLaMA-Factory ! + +本篇为 :doc:`快速开始 <./quick_start>` 的进阶,同样首先安装 DeepSpeed 和 ModelScope: + +.. code-block:: + + pip install -e ".[deepspeed,modelscope]" -i https://pypi.tuna.tsinghua.edu.cn/simple + +多卡 NPU 指定 +-------------------------- + +使用 ``export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3`` 指定所需 NPU 卡号,此处为 0~3 四卡 NPU。 + +.. note:: + + 昇腾 NPU 卡从 0 开始编号,docker 容器内也是如此; + + 如映射物理机上的 6,7 号 NPU 卡到容器内使用,其对应的卡号分别为 0,1 + + +或使用以下脚本自动检测并指定多卡 NPU: + +.. code-block:: shell + + # ------------------------------ detect npu -------------------------------------- + # detect npu via npu-smi + if command -v npu-smi info &> /dev/null; then + num_npus=$(npu-smi info -l | grep "Total Count" | awk -F ":" '{print $NF}') + npu_list=$(seq -s, 0 $((num_npus-1))) + else + num_npus=-1 + npu_list="-1" + fi + echo using npu : $npu_list + num_gpus=$(echo $npu_list | awk -F "," '{print NF}') + # -------------------------------------------------------------------------------- + export ASCEND_RT_VISIBLE_DEVICES=$npu_list + +基于 LoRA 的模型多卡分布式微调 +------------------------------- + +通过 ``ASCEND_RT_VISIBLE_DEVICES`` 变量指定多卡后,使用 torchrun 启动分布式训练,需指定 ``nproc_per_node`` 参数为 NPU 卡数量,其余参数配置与 :doc:`快速开始 <./quick_start>` 中单卡微调保持一致 + +.. code-block:: shell + + torchrun --nproc_per_node $num_npus \ + --nnodes 1 \ + --node_rank 0 \ + --master_addr 127.0.0.1 \ + --master_port 7007 \ + src/train.py /qwen1_5_lora_sft_ds.yaml diff --git a/_sources/sources/llamafactory/quick_start.rst.txt b/_sources/sources/llamafactory/quick_start.rst.txt new file mode 100644 index 0000000..ceaafb5 --- /dev/null +++ b/_sources/sources/llamafactory/quick_start.rst.txt @@ -0,0 +1,160 @@ +快速开始 +============ + +.. note:: + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 LLaMA-Factory ! + +本教程聚焦大语言模型(Large Language Model,LLM)的微调过程,以 Qwen1.5-7B 模型为例,讲述如何使用 LLaMA-Factory 在昇腾 NPU 上进行 LoRA 微调及推理。 + +本篇将使用到 DeepSpeed 和 ModelScope,请使用以下指令安装: + +.. code-block:: + + pip install -e ".[deepspeed,modelscope]" -i https://pypi.tuna.tsinghua.edu.cn/simple + +环境变量配置 +------------- + +通过环境变量设置单卡 NPU,并使用 ModelScope 下载模型/数据集: + +.. code-block:: shell + + export ASCEND_RT_VISIBLE_DEVICES=0 + export USE_MODELSCOPE_HUB=1 + +基于 LoRA 的模型微调 +------------------------ + + +.. _qwen_yaml: + +yaml 配置文件 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +在 LLAMA-Factory 目录下,创建如下 qwen1_5_lora_sft_ds.yaml: + +.. raw:: html + +
+ 展开 qwen1_5_lora_sft_ds.yaml +
+
### model
+    model_name_or_path: qwen/Qwen1.5-7B
+
+    ### method
+    stage: sft
+    do_train: true
+    finetuning_type: lora
+    lora_target: q_proj,v_proj
+
+    ### ddp
+    ddp_timeout: 180000000
+    deepspeed: examples/deepspeed/ds_z0_config.json
+
+    ### dataset
+    dataset: identity,alpaca_en_demo
+    template: qwen
+    cutoff_len: 1024
+    max_samples: 1000
+    overwrite_cache: true
+    preprocessing_num_workers: 16
+
+    ### output
+    output_dir: saves/Qwen1.5-7B/lora/sft
+    logging_steps: 10
+    save_steps: 500
+    plot_loss: true
+    overwrite_output_dir: true
+
+    ### train
+    per_device_train_batch_size: 1
+    gradient_accumulation_steps: 2
+    learning_rate: 0.0001
+    num_train_epochs: 3.0
+    lr_scheduler_type: cosine
+    warmup_steps: 0.1
+    fp16: true
+
+    ### eval
+    val_size: 0.1
+    per_device_eval_batch_size: 1
+    evaluation_strategy: steps
+    eval_steps: 500
+        
+
+
+ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +开启微调 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +使用 torchrun 启动微调,微调涉及的所有参数均在 :ref:`qwen_yaml` 中设置。 + +.. code-block:: shell + + torchrun --nproc_per_node 1 \ + --nnodes 1 \ + --node_rank 0 \ + --master_addr 127.0.0.1 \ + --master_port 7007 \ + src/train.py qwen1_5_lora_sft_ds.yaml + +.. note:: + + ``nproc_per_node, nnodes, node_rank, master_addr, master_port`` 为 torchrun 所需参数,其详细含义可参考 `PyTorch 官方文档 `_。 + +如正常输出模型加载、损失 loss 等日志,即说明成功微调。如需NPU 多卡分布式训练请参考 :doc:`单机多卡微调 <./multi_npu>` + +动态合并 LoRA 的推理 +--------------------- + +经 LoRA 微调后,通过 ``llamafactory-cli chat`` 使用微调后的模型进行推理,指定 ``adapter_name_or_path`` 参数为 LoRA 微调模型的存储路径: + +.. code-block:: shell + + llamafactory-cli chat --model_name_or_path qwen/Qwen1.5-7B \ + --adapter_name_or_path saves/Qwen1.5-7B/lora/sft \ + --template qwen \ + --finetuning_type lora + +.. note:: + 确保微调及推理阶段使用同一 prompt 模板 ``template`` + +接下来即可在终端使用微调的模型进行问答聊天了!使用 ``Ctrl+C`` 或输入 ``exit`` 退出该问答聊天,如下图所示,为在 NPU 成功推理的样例: + +.. figure:: ./images/chat-llamafactory.gif + :align: center + +.. note:: + 第一轮问答会有一些 warning 告警,这是由于 transformers 库更新所致,不影响推理的正常运行,请忽略 + +完整脚本 +----------- + +推理及微调脚本 +~~~~~~~~~~~~~~~~ + +使用 Qwen1.5-7B 模型微调和推理的完整脚本如下: + +.. code-block:: shell + + # use modelscope + export USE_MODELSCOPE_HUB=1 + + # specify NPU + export ASCEND_RT_VISIBLE_DEVICES=0 + + ### qwen/Qwen1.5-7B + ### finetune + torchrun --nproc_per_node 1 \ + --nnodes 1 \ + --node_rank 0 \ + --master_addr 127.0.0.1 \ + --master_port 7007 \ + src/train.py /qwen1_5_lora_sft_ds.yaml + + ### inference -- chat + llamafactory-cli chat --model_name_or_path qwen/Qwen1.5-7B \ + --adapter_name_or_path saves/Qwen1.5-7B/lora/sft \ + --template qwen \ + --finetuning_type lora diff --git a/_sources/sources/lm_deploy/index.rst.txt b/_sources/sources/lm_deploy/index.rst.txt new file mode 100644 index 0000000..9bb56a9 --- /dev/null +++ b/_sources/sources/lm_deploy/index.rst.txt @@ -0,0 +1,10 @@ +LMDeploy +=========== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst + + diff --git a/_sources/sources/lm_deploy/install.rst.txt b/_sources/sources/lm_deploy/install.rst.txt new file mode 100644 index 0000000..242e7c8 --- /dev/null +++ b/_sources/sources/lm_deploy/install.rst.txt @@ -0,0 +1,51 @@ +安装指南 +============== + +LMDeploy 是一个用于大型语言模型(LLMs)和视觉-语言模型(VLMs)压缩、部署和服务的 Python 库。其核心推理引擎包括 TurboMind 引擎和 PyTorch 引擎, +前者由 C++ 和 CUDA 开发,致力于推理性能的优化,而后者纯 Python 开发,旨在降低开发者的门槛。 + +本教程面向使用 lm-deploy & 昇腾的开发者,帮助完成昇腾环境下 lm-deploy 的安装。 + + +lm_deploy 下载安装 +--------------------------- + +使用 pip 安装(推荐) +++++++++++++++++++++++++++ + +推荐在一个干净的 conda 环境下(python3.8 - 3.12),安装 lmdeploy : + +.. code-block:: shell + :linenos: + + conda create -n lmdeploy python=3.8 -y + conda activate lmdeploy + pip install lmdeploy + + +从源码安装 ++++++++++++++++++++++++++++++++ + +如果你使用 PyTorch 引擎进行推理,从源代码安装非常简单: + +.. code-block:: shell + :linenos: + + git clone https://github.com/InternLM/lmdeploy.git + cd lmdeploy + pip install -e . + +安装校验 +----------------- + +安装过程中未出现错误,且执行下面命令后出现 lmdeploy 版本号即为安装成功。 + +.. code-block:: shell + :linenos: + + python -c "import lmdeploy; print(lmdeploy.__version__)" + + # 以下为输出示例 + # 0.6.2 + + diff --git a/_sources/sources/lm_deploy/quick_start.rst.txt b/_sources/sources/lm_deploy/quick_start.rst.txt new file mode 100644 index 0000000..730ed63 --- /dev/null +++ b/_sources/sources/lm_deploy/quick_start.rst.txt @@ -0,0 +1,161 @@ +快速开始 +========================= + +我们基于 LMDeploy 的 PytorchEngine,增加了华为昇腾设备(Atlas 800T A2)的支持。所以,在华为昇腾上使用 LMDeploy 的方法与在英伟达 GPU 上使用 PytorchEngine 后端的方法几乎相同。在阅读本教程之前,请先阅读原版的 `快速开始 `_ 。 + +安装 +----- + +我们强烈建议用户构建一个 Docker 镜像以简化环境设置。 +克隆 lmdeploy 的源代码,Dockerfile 位于 docker 目录中。 + +.. code-block:: shell + :linenos: + + git clone https://github.com/InternLM/lmdeploy.git + cd lmdeploy + +环境准备 +-------- + +Docker 版本应不低于 18.03。并且需按照 `官方指南 `_ 安装 Ascend Docker Runtime。 + + +.. note:: + + 如果在后续容器内出现 `libascend_hal.so: cannot open shared object file` 错误,说明 Ascend Docker Runtime 没有被正确安装。 + +Drivers,Firmware 和 CANN +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +目标机器需安装华为驱动程序和固件版本至少为 23.0.3,请参考 +`CANN 驱动程序和固件安装 `_ +和 `下载资源 `_ 。 + +另外,**docker/Dockerfile_aarch64_ascend** 没有提供CANN 安装包,用户需要自己从 `昇腾资源下载中心 `_ 下载 CANN(version 8.0.RC2.beta1)软件包。 +并将 **Ascend-cann-kernels-910b*.run** ,**Ascend-cann-nnal_*.run** 和 **Ascend-cann-toolkit*.run** 放在 lmdeploy 源码根目录下。 + +构建镜像 +~~~~~~~~~~~~~~~~~~ + +请在 lmdeploy 源代码根目录下执行以下镜像构建命令,CANN 相关的安装包也放在此目录下。 + +.. code-block:: shell + :linenos: + + DOCKER_BUILDKIT=1 docker build -t lmdeploy-aarch64-ascend:latest \ + -f docker/Dockerfile_aarch64_ascend . + +如果以下命令执行没有任何错误,这表明环境设置成功。 + +.. code-block:: shell + :linenos: + + docker run -e ASCEND_VISIBLE_DEVICES=0 --rm --name lmdeploy -t lmdeploy-aarch64-ascend:latest lmdeploy check_env + + +关于在昇腾设备上运行 `docker run` 命令的详情,请参考这篇 `文档 `_ 。 + +离线批处理 +---------- + +.. note:: + + 图模式已经支持了 Atlas 800T A2。目前,单卡下的 LLaMa3-8B/LLaMa2-7B/Qwen2-7B 已经通过测试。用户可以设定 `eager_mode=False` 来开启图模式,或者设定 `eager_mode=True` 来关闭图模式。(启动图模式需要事先 source `/usr/local/Ascend/nnal/atb/set_env.sh`) + +LLM 推理 +~~~~~~~~~ + +将 `device_type="ascend"` 加入 `PytorchEngineConfig` 的参数中。 + +.. code-block:: python + :linenos: + + from lmdeploy import pipeline + from lmdeploy import PytorchEngineConfig + if __name__ == "__main__": + pipe = pipeline("internlm/internlm2_5-7b-chat", + backend_config=PytorchEngineConfig(tp=1, device_type="ascend", eager_mode=True)) + question = ["Shanghai is", "Please introduce China", "How are you?"] + response = pipe(question) + print(response) + +VLM 推理 +~~~~~~~~~ + +将 `device_type="ascend"` 加入 `PytorchEngineConfig` 的参数中。 + +.. code-block:: python + :linenos: + + from lmdeploy import pipeline, PytorchEngineConfig + from lmdeploy.vl import load_image + if __name__ == "__main__": + pipe = pipeline('OpenGVLab/InternVL2-2B', + backend_config=PytorchEngineConfig(tp=1, device_type='ascend', eager_mode=True)) + image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg') + response = pipe(('describe this image', image)) + print(response) + +在线服务 +--------- + +.. note:: + + 图模式已经支持 Atlas 800T A2。目前,单卡下的 InternLM2-7B/LLaMa2-7B/Qwen2-7B 已经通过测试。 + 在线服务时,图模式默认开启,用户可以添加 `--eager-mode` 来关闭图模式。(启动图模式需要事先 source `/usr/local/Ascend/nnal/atb/set_env.sh` ) + +LLM 模型服务 +~~~~~~~~~~~~~ + +将 `--device ascend` 加入到服务启动命令中。 + +.. code-block:: shell + :linenos: + + lmdeploy serve api_server --backend pytorch --device ascend --eager-mode internlm/internlm2_5-7b-chat + + +VLM 模型服务 +~~~~~~~~~~~~~ + +将 `--device ascend` 加入到服务启动命令中。 + +.. code-block:: shell + :linenos: + + lmdeploy serve api_server --backend pytorch --device ascend --eager-mode OpenGVLab/InternVL2-2B + + +使用命令行与LLM模型对话 +-------------------------------------- + +将 `--device ascend` 加入到服务启动命令中。 + +.. code-block:: shell + :linenos: + + lmdeploy chat internlm/internlm2_5-7b-chat --backend pytorch --device ascend --eager-mode + + +也可以运行以下命令使启动容器后开启 lmdeploy 聊天 + +.. code-block:: shell + :linenos: + + docker exec -it lmdeploy_ascend_demo \ + bash -i -c "lmdeploy chat --backend pytorch --device ascend --eager-mode internlm/internlm2_5-7b-chat" + + +量化 +---- + +运行下面的代码可以在 Atlas 800T A2 上对权重进行 W4A16 量化。 + +.. code-block:: shell + :linenos: + + lmdeploy lite auto_awq $HF_MODEL --work-dir $WORK_DIR --device npu + + +支持的模型列表请参考 `支持的模型 `_ 。 diff --git a/_sources/sources/lm_evaluation/index.rst.txt b/_sources/sources/lm_evaluation/index.rst.txt new file mode 100644 index 0000000..22e8f69 --- /dev/null +++ b/_sources/sources/lm_evaluation/index.rst.txt @@ -0,0 +1,8 @@ +LM-Evalution-Harness +=============================== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/_sources/sources/lm_evaluation/install.rst.txt b/_sources/sources/lm_evaluation/install.rst.txt new file mode 100644 index 0000000..79cecb2 --- /dev/null +++ b/_sources/sources/lm_evaluation/install.rst.txt @@ -0,0 +1,74 @@ +安装指南 +============== + +本教程面向使用lm-evaluation-harnes&昇腾的开发者,帮助完成昇腾环境下lm-evaluation-harness的安装。 + +.. note:: + + 请确保已经根据 `快速安装昇腾环境 <../ascend/quick_install.html>`_ 指引安装了对应的CANN-toolkit版本以及相应的固件和驱动,并应用了CANN-toolkit环境变量。 + +.. warning:: + + lm-evaluation-harness支持的CANN最低版本为8.0.rc1。安装CANN时,请同事安装Kernel算子包。 + + +lm-evaluation-harness安装 +---------------------------------- + +注意:lm-evaluation-harness从0.4.3开始原生支持昇腾。 + +- Option 1: Use the latest stable release + +.. code-block:: shell + :linenos: + + pip install --upgrade-strategy=conservative lm-eval + + +- Option 2: Use the latest main branch under development + +.. code-block:: shell + :linenos: + + pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git + + +安装校验 +---------------- + +使用以下指令对lm-evaluation-harness的安装进行校验: + +.. code-block:: shell + :linenos: + + lm-eval -h + + +如下所示,正确显示 `lm-eval` 命令的帮助信息即说明安装成功。 + +.. code-block:: shell + :linenos: + + usage: lm-eval [-h] [--model MODEL] [--tasks task1,task2] [--model_args MODEL_ARGS] [--num_fewshot N] + [--batch_size auto|auto:N|N] [--max_batch_size N] [--device DEVICE] + [--output_path DIR|DIR/file.json] [--limit N|0` 准备好昇腾环境! + +ONNX Runtime 安装 +------------------- + +ONNX Runtime 目前提供了 源码编译 和 二进制包 两种安装方式,其中二进制包当前只支持Python。 + +从源码安装 +^^^^^^^^^^^^ + +.. code-block:: shell + :linenos: + + # Default path, change it if needed. + source /usr/local/Ascend/ascend-toolkit/set_env.sh + + ./build.sh --config --build_shared_lib --parallel --use_cann + + +从pip安装 +^^^^^^^^^^^^ + +.. code-block:: shell + :linenos: + + pip3 install onnxruntime-cann diff --git a/_sources/sources/onnxruntime/quick_start.rst.txt b/_sources/sources/onnxruntime/quick_start.rst.txt new file mode 100644 index 0000000..60cf448 --- /dev/null +++ b/_sources/sources/onnxruntime/quick_start.rst.txt @@ -0,0 +1,97 @@ +快速开始 +=========== + +.. note:: + 阅读本篇前,请确保已按照 :doc:`安装指南 <./install>` 准备好昇腾环境及 ONNX Runtime! + +本教程以一个简单的 resnet50 模型为例,讲述如何在 Ascend NPU上使用 ONNX Runtime 进行模型推理。 + +环境准备 +----------- + +安装本教程所依赖的额外必要库。 + +.. code-block:: shell + :linenos: + + pip install numpy Pillow onnx + +模型准备 +----------- + +ONNX Runtime 推理需要 ONNX 格式模型作为输入,目前有以下几种主流途径获得 ONNX 模型。 + +1. 从 `ONNX Model Zoo `_ 中下载模型。 +2. 从 torch、TensorFlow 等框架导出 ONNX 模型。 +3. 使用转换工具,完成其他类型到 ONNX 模型的转换。 + +本教程使用的 resnet50 模型是从 ONNX Model Zoo 中直接下载的,具体的 `下载链接 `_ + +类别标签 +----------- + +类别标签用于将输出权重转换成人类可读的类别信息,具体的 `下载链接 `_ + +模型推理 +----------- + +.. code-block:: python + :linenos: + + import onnxruntime as ort + import numpy as np + import onnx + from PIL import Image + + def preprocess(image_path): + img = Image.open(image_path) + img = img.resize((224, 224)) + img = np.array(img).astype(np.float32) + + img = np.transpose(img, (2, 0, 1)) + img = img / 255.0 + mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1) + std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1) + img = (img - mean) / std + img = np.expand_dims(img, axis=0) + return img + + def inference(model_path, img): + options = ort.SessionOptions() + providers = [ + ( + "CANNExecutionProvider", + { + "device_id": 0, + "arena_extend_strategy": "kNextPowerOfTwo", + "npu_mem_limit": 2 * 1024 * 1024 * 1024, + "op_select_impl_mode": "high_performance", + "optypelist_for_implmode": "Gelu", + "enable_cann_graph": True + }, + ), + "CPUExecutionProvider", + ] + + session = ort.InferenceSession(model_path, sess_options=options, providers=providers) + input_name = session.get_inputs()[0].name + output_name = session.get_outputs()[0].name + + result = session.run([output_name], {input_name: img}) + return result + + def display(classes_path, result): + with open(classes_path) as f: + labels = [line.strip() for line in f.readlines()] + + pred_idx = np.argmax(result) + print(f'Predicted class: {labels[pred_idx]} ({result[0][0][pred_idx]:.4f})') + + if __name__ == '__main__': + model_path = '~/model/resnet/resnet50.onnx' + image_path = '~/model/resnet/cat.jpg' + classes_path = '~/model/resnet/imagenet_classes.txt' + + img = preprocess(image_path) + result = inference(model_path, img) + display(classes_path, result) diff --git a/_sources/sources/open_clip/index.rst.txt b/_sources/sources/open_clip/index.rst.txt new file mode 100644 index 0000000..7ea82e4 --- /dev/null +++ b/_sources/sources/open_clip/index.rst.txt @@ -0,0 +1,8 @@ +open_clip +=========== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/_sources/sources/open_clip/install.rst.txt b/_sources/sources/open_clip/install.rst.txt new file mode 100644 index 0000000..48dec99 --- /dev/null +++ b/_sources/sources/open_clip/install.rst.txt @@ -0,0 +1,70 @@ +安装指南 +============== + +本教程面向使用 open_clip & 昇腾的开发者,帮助完成昇腾环境下 open_clip 的安装。 + +昇腾环境安装 +------------ + +请根据已有昇腾产品型号及CPU架构等按照 :doc:`快速安装昇腾环境指引 <../ascend/quick_install>` 进行昇腾环境安装。 + +.. warning:: + CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。 + +Python 环境创建 +---------------------- + +.. code-block:: shell + :linenos: + + # 创建 python 3.10 的虚拟环境 + conda create -y -n openclip python=3.10 + # 激活虚拟环境 + conda activate openclip + + +open_clip 安装 +---------------------- + +使用以下指令安装 open_clip: + +.. code-block:: shell + :linenos: + + pip install open-clip-torch -i https://pypi.tuna.tsinghua.edu.cn/simple + +torch-npu 安装 +---------------------- + +按照 :doc:`torch-npu 安装指引 <../pytorch/install>` 安装 2.2.0 版本 torch 和 torch-npu,或使用以下指令快速安装: + +.. code-block:: shell + :linenos: + + # install the dependencies + pip3 install attrs numpy==1.26.4 decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions -i https://pypi.tuna.tsinghua.edu.cn/simple + # install torch and torch-npu + pip install torch==2.2.0 torch-npu==2.2.0 -i https://pypi.tuna.tsinghua.edu.cn/simple + +安装校验 +---------------------- + +使用以下 Python 脚本对 open_clip 的安装进行校验,正确打印 open_clip 的版本号和 NPU 卡号说明安装成功。 + +.. code-block:: python + :linenos: + :emphasize-lines: 2 + + import torch + import torch_npu + import open_clip + + print("open_cliop version: ", clip.version.__version__) + print("NPU devices: ", torch.npu.current_device()) + +正确回显如下(单卡 NPU 环境): + +.. code-block:: shell + + open_cliop version: 2.24.0 + NPU devices: 0 diff --git a/_sources/sources/open_clip/quick_start.rst.txt b/_sources/sources/open_clip/quick_start.rst.txt new file mode 100644 index 0000000..8e54bed --- /dev/null +++ b/_sources/sources/open_clip/quick_start.rst.txt @@ -0,0 +1,175 @@ +快速开始 +================== + +.. note:: + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 open_clip ! + +本文档帮助昇腾开发者快速使用 open_clip × 昇腾 进行训练和推理。 + +使用 NPU 的训练 +--------------------- + +首先在 ``src/training/main.py`` 脚本导入 torch 后,导入 torch-npu,并将 cuda 对应的 ``GradScaler`` 替换为 npu 的: + +.. code-block:: python + :linenos: + + import torch + import torch-npu + from torch.npu.amp import GradScaler + +以 ``MS_COCO_2017_URL_TEXT`` 数据集的训练为例,使用在 ``DataComp`` 数据集训练过的 ``CLIP-ViT-B-32`` 模型权重为预训练权重,使用以下脚本启动单卡/多卡 NPU 上的训练: + +单卡训练 +~~~~~~~~~~~~~~~ + +.. note:: + + 请根据实际情况指定数据集路径 ``train-data`` 、 ``val-data`` 、 ``imagenet-val`` 和预训练模型路径 ``pretrained`` + +.. code-block:: shell + :linenos: + + python -m training.main \ + --model ViT-B-32 \ + --save-frequency 1 \ + --zeroshot-frequency 1 \ + --report-to tensorboard \ + --train-data="./data/MS_COCO_2017_URL_TEXT/traincoco.csv" \ + --val-data="./data/MS_COCO_2017_URL_TEXT/traincoco.csv" \ + --imagenet-val="./data/ImageNet-1000/val/" \ + --pretrained "./models/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K/open_clip_pytorch_model.bin" \ + --warmup 10000 \ + --batch-size=128 \ + --lr=1e-3 \ + --wd=0.1 \ + --epochs=8 \ + --workers=8 \ + --seed 0 + +分布式训练 +~~~~~~~~~~~~~~~ + +使用 torchrun 启动 NPU 分布式训练,需指定通信后端为 hccl(``--dist-backend="hccl"``): + +.. note:: + + 请根据实际情况指定数据集路径 ``train-data`` 、 ``val-data`` 、 ``imagenet-val`` 和预训练模型路径 ``pretrained`` + + ``nproc_per_node`` 需指定为每个节点卡的数量,为 torchrun 所需参数,更多 torchrun 相关参数详细含义可参考 `PyTorch 官方文档 `_。 + + +.. code-block:: shell + :linenos: + + # train on multi-npu + torchrun --nproc_per_node 2 -m training.main \ + --save-frequency 1 \ + --zeroshot-frequency 1 \ + --report-to tensorboard \ + --train-data="./data/MS_COCO_2017_URL_TEXT/traincoco.csv" \ + --val-data="./data/MS_COCO_2017_URL_TEXT/traincoco.csv" \ + --imagenet-val="./data/ImageNet-1000/val/" \ + --pretrained "./models/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K/open_clip_pytorch_model.bin" \ + --warmup 10000 \ + --batch-size=64 \ + --lr=1e-3 \ + --wd=0.1 \ + --epochs=1 \ + --workers=8 \ + --seed 0 \ + --model ViT-B-32 \ + --dist-backend="hccl" + +使用 NPU 的推理 +--------------------- + +一般而言,自定义脚本中使用 open_clip 在昇腾上训练,需要导入 torch-npu,并将数据和模型放到 NPU 上,如下样例所示: + +.. note:: + + 请根据实际情况替换模型缓存路径 ``/path/to/modelsViT-B-32/`` 、 ``/path/to/models/ViT-B-32/ViT-B-32.pt`` 、 ``/path/to/your/image.jpg`` + +.. code-block:: python + :linenos: + :emphasize-lines: 2,14,15,16,18 + + import torch + import torch_npu + from PIL import Image + import open_clip as clip + + # 下载模型至指定缓存路径 + model = clip.openai.load_openai_model('ViT-B-32', cache_dir="/path/to/modelsViT-B-32/") + + model, _, preprocess = clip.create_model_and_transforms('ViT-B-32', pretrained='/path/to/models/ViT-B-32/ViT-B-32.pt') + tokenizer = clip.get_tokenizer('ViT-B-32') + + # put inputs and model to npu + image = preprocess(Image.open("/path/to/your/image.jpg")).unsqueeze(0).to("npu") + text = tokenizer(["a diagram", "a dog", "a cat"]).to("npu") + model = model.to("npu") + + with torch.no_grad(), torch.npu.amp.autocast(): + image_features = model.encode_image(image) + text_features = model.encode_text(text) + image_features /= image_features.norm(dim=-1, keepdim=True) + text_features /= text_features.norm(dim=-1, keepdim=True) + + text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1) + + print("Label probs:", text_probs) # prints: [[1., 0., 0.]] + +本示例所用输入图像: + +.. figure:: ./images/CLIP.png + :align: center + :scale: 50% + +对应输出以下内容,正确预测其分类为 a dog: + +.. code-block:: shell + + Label probs: tensor([[0.0010, 0.9941, 0.0049]], device='npu:0') + +模型评估 +------------------ + +在 ``src/training/profiler.py`` 脚本导入 torch-npu,并将模型放到 NPU 上: + +.. code-block:: python + :linenos: + :emphasize-lines: 4,16,17 + + import argparse + + import torch + import torch_npu + + import open_clip + import pandas as pd + from torch.utils.flop_counter import FlopCounterMode + + ... ... + + def profile_model(model_name, batch_size=1, profiler='torch'): + model.eval() + if torch.cuda.is_available(): + model = model.cuda() + elif torch.npu.is_available(): + model = model.npu() + +使用以下指令完成模型评估: + +.. code-block:: shell + :linenos: + + python3 -m training.profiler --model ViT-L-14 --results-file "./logs/profiler_results.csv" + +评估结果保存在 ``./logs/profiler_results.csv`` 文件中: + +.. code-block:: shell + + model,image_size,image_width,text_width,embed_dim,mparams,image_mparams,text_mparams,gflops,image_gflops,text_gflops + ViT-L-14,224,1024,768,768,427.62,303.97,123.65,175.33,162.03,13.3 + diff --git a/_sources/sources/opencompass/index.rst.txt b/_sources/sources/opencompass/index.rst.txt new file mode 100644 index 0000000..44e2476 --- /dev/null +++ b/_sources/sources/opencompass/index.rst.txt @@ -0,0 +1,8 @@ +OpenCompass +============ + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/_sources/sources/opencompass/install.rst.txt b/_sources/sources/opencompass/install.rst.txt new file mode 100644 index 0000000..67dadb6 --- /dev/null +++ b/_sources/sources/opencompass/install.rst.txt @@ -0,0 +1,81 @@ +安装指南 +============== + +本教程面向使用 OpenCompass & 昇腾的开发者,帮助完成昇腾环境下 OpenCompass 的安装。 + +昇腾环境安装 +------------ + +请根据已有昇腾产品型号及CPU架构等按照 :doc:`快速安装昇腾环境指引 <../ascend/quick_install>` 进行昇腾环境安装。 + +.. warning:: + CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。 + +Python 环境创建 +---------------------- + +.. code-block:: shell + :linenos: + + # 创建 python 3.10 的虚拟环境 + conda create -y -n opencompass python=3.10 + # 激活虚拟环境 + conda activate opencompass + + +OpenCompass 安装 +---------------------- + +使用以下指令安装 OpenCompass: + +.. code-block:: shell + :linenos: + + pip install -U opencompass -i https://pypi.tuna.tsinghua.edu.cn/simple + + ## Full installation (with support for more datasets) + # pip install "opencompass[full]" + + ## Environment with model acceleration frameworks + ## Manage different acceleration frameworks using virtual environments + ## since they usually have dependency conflicts with each other. + # pip install "opencompass[lmdeploy]" + # pip install "opencompass[vllm]" + + ## API evaluation (i.e. Openai, Qwen) + # pip install "opencompass[api]" + +torch-npu 安装 +---------------------- + +按照 :doc:`torch-npu 安装指引 <../pytorch/install>` 安装 2.1.0 版本 torch 和 torch-npu,或使用以下指令快速安装: + +.. code-block:: shell + :linenos: + + # install the dependencies + pip3 install attrs numpy==1.26.4 decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions -i https://pypi.tuna.tsinghua.edu.cn/simple + # install torch and torch-npu + pip install torch==2.1.0 torch-npu==2.1.0 -i https://pypi.tuna.tsinghua.edu.cn/simple + +安装校验 +---------------------- + +使用以下 Python 脚本对 open_clip 的安装进行校验,正确打印 open_clip 的版本号和 NPU 卡号说明安装成功。 + +.. code-block:: python + :linenos: + :emphasize-lines: 2 + + import torch + import opencompass + + print("opencompass version: ", opencompass.__version__) + print("NPU devices: ", torch.npu.current_device()) + +正确回显如下(单卡 NPU 环境): + +.. code-block:: shell + + opencompass version: 0.3.3 + NPU devices: 0 diff --git a/_sources/sources/opencompass/quick_start.rst.txt b/_sources/sources/opencompass/quick_start.rst.txt new file mode 100644 index 0000000..547239d --- /dev/null +++ b/_sources/sources/opencompass/quick_start.rst.txt @@ -0,0 +1,170 @@ +快速开始 +================== + +.. note:: + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 OpenCompass ! + +本文档帮助昇腾开发者快速使用 OpenCompass × 昇腾 进行训练和推理。 + +概览 +--------------------- + +在 OpenCompass 中评估一个模型通常包括以下几个阶段:配置 -> 推理 -> 评估 -> 可视化。 + +配置:这是整个工作流的起点。您需要配置整个评估过程,选择要评估的模型和数据集。此外,还可以选择评估策略、计算后端等,并定义显示结果的方式。 + +推理与评估:在这个阶段,OpenCompass 将会开始对模型和数据集进行并行推理和评估。推理阶段主要是让模型从数据集产生输出,而评估阶段则是衡量这些输出与标准答案的匹配程度。这两个过程会被拆分为多个同时运行的“任务”以提高效率,但请注意,如果计算资源有限,这种策略可能会使评测变得更慢。如果需要了解该问题及解决方案,可以参考 +`FAQ: 效率: `_ + +可视化:评估完成后,OpenCompass 将结果整理成易读的表格,并将其保存为 CSV 和 TXT 文件。你也可以激活飞书状态上报功能,此后可以在飞书客户端中及时获得评测状态报告。 + +接下来,我们将展示 OpenCompass 的基础用法,展示基座模型 ``InternLM2-1.8B`` 和对话模型 ``InternLM2-Chat-1.8B``、``Qwen2-1.5B-Instruct`` 在 GSM8K 和 MATH 下采样数据集上的评估。它们的配置文件可以在 ``configs/eval_chat_demo.py`` 和 ``configs/eval_base_demo.py`` 中找到。 + +在运行此实验之前,请确保您已在本地安装了 ``opencompass`` && ``torch-npu``。 + +本文参考: +`OpenCompass官方文档 `_ + + +配置评估任务 +~~~~~~~~~~~~~~~ + +.. note:: + + 在 OpenCompass 中,每个评估任务由待评估的模型和数据集组成。评估的入口点是 run.py。用户可以通过命令行或配置文件选择要测试的模型和数据集。 + +对于对话模型: + +.. code-block:: shell + :linenos: + + python run.py \ + --models hf_internlm2_chat_1_8b hf_qwen2_1_5b_instruct \ + --datasets demo_gsm8k_chat_gen demo_math_chat_gen \ + --debug + +对于基座模型: + +.. code-block:: shell + :linenos: + + python run.py \ + --models hf_internlm2_1_8b hf_qwen2_1_5b \ + --datasets demo_gsm8k_base_gen demo_math_base_gen \ + --debug + +.. list-table:: opencompass run.py 参数说明 + :widths: 15 30 25 + :header-rows: 1 + + * - 命令行参数 + - 描述 + - 样例数值 + * - --hf-type + - HuggingFace 模型类型,可选值为 chat 或 base + - chat + * - --hf-path + - HuggingFace 模型路径 + - internlm/internlm2-chat-1_8b + * - --model-kwargs + - 构建模型的参数 + - device_map=’auto’ + * - --tokenizer-path + - HuggingFace tokenizer 路径(如果与模型路径相同,可以省略) + - internlm/internlm2-chat-1_8b + * - --tokenizer-kwargs + - 构建 tokenizer 的参数 + - padding_side=’left’ truncation=’left’ trust_remote_code=True + * - --generation-kwargs + - 生成的参数 + - do_sample=True top_k=50 top_p=0.95 + * - --max-seq-len + - 模型可以接受的最大序列长度 + - 2048 + * - --max-out-len + - 生成的最大 token 数 + - 100 + * - --min-out-len + - 生成的最小 token 数 + - 1 + * - --batch-size + - 批量大小 + - 64 + * - --hf-num-gpus + - 运行一个模型实例所需的 GPU 数量 + - 1 + * - --stop-words + - 停用词列表 + - ‘<|im_end|>’ ‘<|im_start|>’ + * - --pad-token-id + - 填充 token 的 ID + - 0 + * - --peft-path + - (例如) LoRA 模型的路径 + - internlm/internlm2-chat-1_8b + * - --peft-kwargs + - (例如) 构建 LoRA 模型的参数 + - trust_remote_code=True + + + +启动评估 +~~~~~~~~~~~~~~~ + +由于 OpenCompass 默认并行启动评估过程,我们可以在第一次运行时以 ``--debug`` 模式启动评估,并检查是否存在问题。包括在前述的所有文档中,我们都使用了 ``--debug`` 开关。在 ``--debug`` 模式下,任务将按顺序执行,并实时打印输出。 + +.. code-block:: shell + :linenos: + + # train on multi-npu + python run.py configs/eval_chat_demo.py -w outputs/demo --debug + + +对话默写 ‘internlm/internlm2-chat-1_8b’ 和 ‘Qwen/Qwen2-1.5B-Instruct’ 将在首次运行期间从 HuggingFace 自动下载。 如果一切正常,您应该看到屏幕上显示 “Starting inference process”,且进度条开始前进: + +.. code-block:: shell + :linenos: + + # train on multi-npu + [2023-07-12 18:23:55,076] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process... + +然后,您可以按 Ctrl+C 中断程序,并以正常模式运行以下命令: + +.. code-block:: shell + :linenos: + + # train on multi-npu + python run.py configs/eval_chat_demo.py -w outputs/demo + +在正常模式下,评估任务将在后台并行执行,其输出将被重定向到输出目录 ``outputs/demo/{TIMESTAMP}``。前端的进度条只指示已完成任务的数量,而不考虑其成功或失败。任何后端任务失败都只会在终端触发警告消息。 + +可视化评估结果 +--------------------- + +评估完成后,评估结果表格将打印如下: + +.. code-block:: shell + :linenos: + + dataset version metric mode qwen2-1.5b-instruct-hf internlm2-chat-1.8b-hf + ---------- --------- -------- ------ ------------------------ ------------------------ + demo_gsm8k 1d7fe4 accuracy gen 56.25 32.81 + demo_math 393424 accuracy gen 18.75 14.06 + + +所有运行输出将定向到 ``outputs/demo/`` 目录,结构如下: + +.. code-block:: shell + :linenos: + + outputs/default/ + ├── 20200220_120000 + ├── 20230220_183030 # 每个实验一个文件夹 + │ ├── configs # 用于记录的已转储的配置文件。如果在同一个实验文件夹中重新运行了不同的实验,可能会保留多个配置 + │ ├── logs # 推理和评估阶段的日志文件 + │ │ ├── eval + │ │ └── infer + │ ├── predictions # 每个任务的推理结果 + │ ├── results # 每个任务的评估结果 + │ └── summary # 单个实验的汇总评估结果 + ├── ... diff --git a/_sources/sources/opencv/index.rst.txt b/_sources/sources/opencv/index.rst.txt new file mode 100644 index 0000000..b8a63e5 --- /dev/null +++ b/_sources/sources/opencv/index.rst.txt @@ -0,0 +1,8 @@ +OpenCV +=========== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/_sources/sources/opencv/install.rst.txt b/_sources/sources/opencv/install.rst.txt new file mode 100644 index 0000000..022b237 --- /dev/null +++ b/_sources/sources/opencv/install.rst.txt @@ -0,0 +1,109 @@ +安装指南 +============== + +OpenCV 4.9.0 版本开始,增加了图像处理相关高频接口的昇腾原生支持,本教程面向使用 OpenCV & 昇腾开发者,帮助完成昇腾环境下 OpenCV 的安装。 + +昇腾环境安装 +------------ + +请根据已有昇腾产品型号及CPU架构等按照 :doc:`快速安装昇腾环境指引 <../ascend/quick_install>` 进行昇腾环境安装,或直接获取对应产品的昇腾环境镜像 `ascendai/cann `_ 。 + +.. warning:: + CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。 + +OpenCV 安装 +---------------------- + + +请遵循以下版本控制: + +======= ========== ========== + lib 最低版本 推荐版本 +======= ========== ========== +OpenCV 4.9.0 latest +Python 3.9 3.10 +GCC 9.4.0 9.4.0 +======= ========== ========== + +Python 环境创建 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: shell + :linenos: + + # 创建名为 opencv 的 python 3.10 的虚拟环境 + conda create -y -n opencv python=3.10 + # 激活虚拟环境 + conda activate opencv + + +源码编译 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. 下载 OpenCV 和 opencv_contrib + +.. code-block:: shell + :linenos: + + git clone https://github.com/opencv/opencv.git + + cd opencv + git clone https://github.com/opencv/opencv_contrib.git + +2. 编译带有 opencv_contrib 的 OpenCV + +.. TODO: check for the simplest cmake config +.. code-block:: shell + :linenos: + + # 在 opencv 项目目录中创建并进入 build 目录 + mkdir build + cd build + + # cmake & make + cmake -D CMAKE_BUILD_TYPE=RELEASE + -D CMAKE_INSTALL_PREFIX=pwd/install \ + -D WITH_DEBUG=0 \ + -D OPENCV_EXTRA_MODULES_PATH=/path/to/opencv/opencv_contrib/modules \ + -D DWITH_CUDA=0 \ + -D DWITH_CANN=1 \ + -D DPYTHON3_EXECUTABLE=/path/to/miniconda3/envs/opencv/bin/python \ + -D DPYTHON_LIBRARY=/path/to/miniconda3/envs/opencv \ + -D PYTHON_INCLUDE_DIR=/path/to/miniconda3/envs/opencv/include/python3.10 \ + -D BUILD_opencv_wechat_qrcode=OFF \ + -D BUILD_opencv_xfeatures2d=OFF \ + -D BUILD_opencv_face=OFF \ + -D BUILD_opencv_dnn=OFF \ + -D BUILD_opencv_features2d=OFF \ + -D WITH_CAROTENE=OFF \ + -D WITH_IPP=OFF \ + -D BUILD_DOCS=ON \ + -D BUILD_EXAMPLES=ON .. + + make -j5 + +当编译出现以下关键回显信息时,说明编译成功。 + +.. code-block:: shell + + # xxx 为 OpenCV 中某模块名称 + [100%] Built target xxx + +安装校验 +---------------------- + +通过以下指令执行昇腾算子单元测试: + +.. code-block:: shell + :linenos: + + cd path/to/opencv/build/bin + ./opencv_test_cannops + +出现以下关键回显说明安装成功: + +.. code-block:: shell + + [==========] 72 tests from 4 test cases ran. (40937 ms total) + [ PASSED ] 72 tests. + diff --git a/_sources/sources/opencv/quick_start.rst.txt b/_sources/sources/opencv/quick_start.rst.txt new file mode 100644 index 0000000..d83345b --- /dev/null +++ b/_sources/sources/opencv/quick_start.rst.txt @@ -0,0 +1,144 @@ +快速开始 +================== + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 OpenCV ! + +OpenCV 中昇腾算子入参列表和 cpu 及 cuda 算子保持一致,除了对昇腾必要的初始化、去初始化之外,用户无需学习 CANN API,仅需要将原来的接口添加 cann 包名(C++ 接口为使用 cann 命名空间),整体流程如下图所示: + +.. figure:: ./images/opencv_cannop.png + :align: center + :scale: 70% + + + +图像处理 +------------- +OpenCV 当前支持 20+ 昇腾算子,此处根据图像处理应用场景,选取 ``add``, ``rotate`` 和 ``flip`` 算子的应用作示例代码, +更多算子见 `OpenCV 官方文档 `_。 + +使用 C++ +~~~~~~~~~~~~~ + +.. note:: + + 通过命令行传参 ``input`` 和 ``output`` 来指定输入和输出图像路径 + +.. code-block:: c++ + :linenos: + :emphasize-lines: 34,35,39,41,43,48,49 + + // This file is part of OpenCV project. + // It is subject to the license terms in the LICENSE file found in the top-level directory + // of this distribution and at http://opencv.org/license.html. + + #include + #include + #include + #include + + int main(int argc, char* argv[]) + { + cv::CommandLineParser parser(argc, argv, + "{@input|puppy.png|path to input image}" + "{@output|output.png|path to output image}" + "{help||show help}"); + parser.about("This is a sample for image processing with Ascend NPU. \n"); + if (argc != 3 || parser.has("help")) + { + parser.printMessage(); + return 0; + } + + std::string imagePath = parser.get(0); + std::string outputPath = parser.get(1); + + // 读取输入图像 + cv::Mat img = cv::imread(imagePath); + // 生成高斯噪声 + cv::Mat gaussNoise(img.rows, img.cols, img.type()); + cv::RNG rng; + rng.fill(gaussNoise, cv::RNG::NORMAL, 0, 25); + + // cann 初始化及指定设备 + cv::cann::initAcl(); + cv::cann::setDevice(0); + + cv::Mat output; + // 添加高斯噪声到输入图像 + cv::cann::add(img, gaussNoise, output); + // 旋转图像 (0, 1, 2, 分别代表旋转 90°, 180°, 270°) + cv::cann::rotate(output, output, 0); + // 翻转图像 (0, 正数, 负数, 分别代表沿 x, y, x 和 y 轴进行翻转) + cv::cann::flip(output, output, 0); + // 写入输出图像 + cv::imwrite(outputPath, output); + + // cann 去初始化 + cv::cann::resetDevice(); + cv::cann::finalizeAcl(); + return 0; + } + +使用 Python +~~~~~~~~~~~~~ + +.. note:: + + 通过命令行传参 ``input`` 和 ``output`` 来指定输入和输出图像路径 + +.. code-block:: python + :linenos: + :emphasize-lines: 20,21,24,26,28,33 + + # This file is part of OpenCV project. + # It is subject to the license terms in the LICENSE file found in the top-level directory + # of this distribution and at http://opencv.org/license.html. + + import numpy as np + import cv2 + import argparse + + parser = argparse.ArgumentParser(description='This is a sample for image processing with Ascend NPU.') + parser.add_argument('image', help='path to input image') + parser.add_argument('output', help='path to output image') + args = parser.parse_args() + + # 读取输入图像 + img = cv2.imread(args.image) + # 生成高斯噪声 + gaussNoise = np.random.normal(0, 25,(img.shape[0], img.shape[1], img.shape[2])).astype(img.dtype) + + # cann 初始化及指定设备 + cv2.cann.initAcl() + cv2.cann.setDevice(0) + + # 添加高斯噪声到输入图像 + output = cv2.cann.add(img, gaussNoise) + # 旋转图像 (0, 1, 2, 分别代表旋转 90°, 180°, 270°) + output = cv2.cann.rotate(output, 0) + # 翻转图像 (0, 正数, 负数, 分别代表沿 x, y, x 和 y 轴进行翻转) + output = cv2.cann.flip(output, 0) + # 写入输出图像 + cv2.imwrite(args.output, output) + + # cann 去初始化 + cv2.cann.finalizeAcl() + + +图像处理结果 +~~~~~~~~~~~~~~~~~ + +本示例使用输入图像如图所示: + +.. figure:: ./images/input.png + :align: center + :scale: 50% + +通过上述 Python 或 C++ 示例代码处理,得到的输出图像为: + +.. figure:: ./images/result.png + :align: center + :scale: 50% + diff --git a/_sources/sources/pytorch/api_doc.rst.txt b/_sources/sources/pytorch/api_doc.rst.txt new file mode 100644 index 0000000..7f66001 --- /dev/null +++ b/_sources/sources/pytorch/api_doc.rst.txt @@ -0,0 +1,518 @@ +API说明 +========== + +PyTorch-NPU 除了提供了 PyTorch 官方算子实现之外,也提供了大量高性能的自定义算子,详细的算子信息以及描述如下所示: + +.. note:: + + 在运行下述示例之前,需要导入torch_npu扩展包 ``import torch_npu`` + +.. py:function:: _npu_dropout(self, p) -> (Tensor, Tensor) + :module: torch_npu + + 不使用种子(seed)进行dropout结果计数,与torch.dropout相似,优化NPU设备实现 + + :param Tensor self: 输入张量 + :param Float p: 丢弃概率 + + :rtype: (Tensor, Tensor) + +示例: + +.. code-block:: python + :linenos: + + >>> input = torch.tensor([1.,2.,3.,4.]).npu() + >>> input + tensor([1., 2., 3., 4.], device='npu:0') + >>> prob = 0.3 + >>> output, mask = torch_npu._npu_dropout(input, prob) + >>> output + tensor([0.0000, 2.8571, 0.0000, 0.0000], device='npu:0') + >>> mask + tensor([ 98, 255, 188, 186, 120, 157, 175, 159, 77, 223, 127, 79, 247, 151, + 253, 255], device='npu:0', dtype=torch.uint8) + +.. py:function:: copy_memory_(dst, src, non_blocking=False) -> Tensor + :module: torch_npu + + 从src拷贝元素到self张量,并返回self + + 约束说明: + + copy_memory_仅支持NPU张量,copy_memory_的输入张量应具有相同的dtype和设备index + + :param Tensor dst: 拷贝源张量 + :param Tensor sr: 返回张量所需数据类型 + :param non_blocking: 如果设置为True且此拷贝位于CPU和NPU之间,则拷贝可能相对于主机异步发生,在其他情况下,此参数没有效果 + :type non_blocking: Bool,Default: ``False`` + + :rtype: Tensor + +示例: + +.. code-block:: python + :linenos: + + >>> a=torch.IntTensor([0, 0, -1]).npu() + >>> b=torch.IntTensor([1, 1, 1]).npu() + >>> a.copy_memory_(b) + tensor([1, 1, 1], device='npu:0', dtype=torch.int32) + +.. py:function:: empty_with_format(size, dtype, layout, device, pin_memory, acl_format) + :module: torch_npu + + 返回一个填充未初始化数据的张量 + + :param ListInt size: 定义输出张量shape的整数序列,可以是参数数量(可变值),也可以是列表或元组等集合 + :param dtype: 返回张量所需数据类型;如果值为None,请使用全局默认值(请参见torch.set_default_tensor_type()). + :type dtype: torch.dtype,Default: ``None`` + :param layout: 返回张量所需布局 + :type layout: torch.layout, Default: ``torch.strided`` + :param device: 返回张量的所需设备 + :type device: torch.device, Default: ``None`` + :param pin_memory: 返回张量的所需设备 + :type pin_memory: Bool, Default: ``False`` + :param acl_format: 返回张量所需内存格式 + :type acl_format: Int, Default: ``2`` + + :rtype: Tensor + +示例: + +.. code-block:: python + :linenos: + + >>> torch_npu.empty_with_format((2, 3), dtype=torch.float32, device="npu") + tensor([[1., 1., 1.], + [1., 1., 1.]], device='npu:0') + +.. py:function:: fast_gelu(self) -> Tensor + :module: torch_npu + + gelu的npu实现,支持FakeTensor模式 + + :param Tensor self: 输入张量(只float16、float32) + + :rtype: Tensor + +示例: + +.. code-block:: python + :linenos: + + # Normal + >>> x = torch.rand(2).npu() + >>> x + tensor([0.5991, 0.4094], device='npu:0') + >>> torch_npu.fast_gelu(x) + tensor([0.4403, 0.2733], device='npu:0') + + # FakeTensorMode + >>> from torch._subclasses.fake_tensor import FakeTensorMode + >>> with FakeTensorMode(): + ... x = torch.rand(2).npu() + ... torch_npu.fast_gelu(x) + >>> FakeTensor(..., device='npu:0', size=(2,)) + +.. py:function:: npu_alloc_float_status(self) -> Tensor + :module: torch_npu + + 生成一个包含8个0的一维张量 + + :param Tensor self: 输入张量 + + :rtype: Tensor + +示例: + +.. code-block:: python + :linenos: + + >>> input = torch.randn([1,2,3]).npu() + >>> output = torch_npu.npu_alloc_float_status(input) + >>> input + tensor([[[ 2.2324, 0.2478, -0.1056], + [ 1.1273, -0.2573, 1.0558]]], device='npu:0') + >>> output + tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='npu:0') + +.. py:function:: npu_anchor_response_flags(self, featmap_size, stride, num_base_anchors) -> Tensor + :module: torch_npu + + 在单个特征图中生成锚点的责任标志 + + :param Tensor self: 真值框,shape为[batch, 4]的2D张量 + :param ListInt[2] featmap_size: 特征图大小 + :param ListInt[2] strides: 当前水平的步长 + :param Int num_base_anchors: base anchors的数量 + + :rtype: Tensor + +示例: + +.. code-block:: python + :linenos: + + >>> x = torch.rand(100, 4).npu() + >>> y = torch_npu.npu_anchor_response_flags(x, [60, 60], [2, 2], 9) + >>> y.shape + torch.Size([32400]) + +.. py:function:: npu_apply_adam(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, use_locking, use_nesterov, out = (var, m, v)) + :module: torch_npu + + adam结果计数。 + + :param Scalar beta1_power: beta1的幂 + :param Scalar beta2_power: beta2的幂 + :param Scalar lr: 学习率 + :param Scalar beta1: 一阶矩估计值的指数衰减率 + :param Scalar beta2: 二阶矩估计值的指数衰减率 + :param Scalar epsilon: 添加到分母中以提高数值稳定性的项数 + :param Tensor grad: 梯度 + :param Bool use_locking: 设置为True时使用lock进行更新操作 + :param Bool use_nesterov: 设置为True时采用nesterov更新 + :param Tensor var: 待优化变量。 + :param Tensor m: 变量平均值。 + :param Tensor v: 变量方差。 + +.. py:function:: npu_batch_nms(self, scores, score_threshold, iou_threshold, max_size_per_class, max_total_size, change_coordinate_frame=False, transpose_box=False) -> (Tensor, Tensor, Tensor, Tensor) + + :module: torch_npu + + 根据batch分类计算输入框评分,通过评分排序,删除评分高于阈值(iou_threshold)的框,支持多批多类处理。通过NonMaxSuppression(nms)操作可有效删除冗余的输入框,提高检测精度。NonMaxSuppression:抑制不是极大值的元素,搜索局部的极大值,常用于计算机视觉任务中的检测类模型。 + + :param Tensor self: 必填值,输入框的tensor,包含batch大小,数据类型Float16,输入示例:[batch_size, num_anchors, q, 4],其中q=1或q=num_classes + :param Tensor scores: 必填值,输入tensor,数据类型Float16,输入示例:[batch_size, num_anchors, num_classes] + :param Float32 score_threshold: 必填值,指定评分过滤器的iou_threshold,用于筛选框,去除得分较低的框,数据类型Float32 + :param Float32 iou_threshold: 必填值,指定nms的iou_threshold,用于设定阈值,去除高于阈值的的框,数据类型Float32 + :param Int max_size_per_class: 必填值,指定每个类别的最大可选的框数,数据类型Int + :param Int max_total_size: 必填值,指定每个batch最大可选的框数,数据类型Int + :param Bool change_coordinate_frame: 可选值, 是否正则化输出框坐标矩阵,数据类型Bool(默认False) + :param Bool transpose_box: 可选值,确定是否在此op之前插入转置,数据类型Bool。True表示boxes使用4,N排布。 False表示boxes使用过N,4排布 + + 输出说明: + :param Tensor nmsed_boxes: shape为(batch, max_total_size, 4)的3D张量,指定每批次输出的nms框,数据类型Float16 + :param Tensor nmsed_scores: shape为(batch, max_total_size)的2D张量,指定每批次输出的nms分数,数据类型Float16 + :param Tensor nmsed_classes: shape为(batch, max_total_size)的2D张量,指定每批次输出的nms类,数据类型Float16 + :param Tensor nmsed_num: shape为(batch)的1D张量,指定nmsed_boxes的有效数量,数据类型Int32 + + :rtype: Tensor + +示例: + +.. code-block:: python + :linenos: + + >>> boxes = torch.randn(8, 2, 4, 4, dtype = torch.float32).to("npu") + >>> scores = torch.randn(3, 2, 4, dtype = torch.float32).to("npu") + >>> nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = torch_npu.npu_batch_nms(boxes, scores, 0.3, 0.5, 3, 4) + >>> nmsed_boxes + >>> nmsed_scores + >>> nmsed_classes + >>> nmsed_num + +.. py:function:: npu_bert_apply_adam(lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay, step_size=None, adam_mode=0, *, out=(var,m,v)) + + :module: torch_npu + + adam结果计数 + + :param Tensor var: float16或float32类型张量 + :param Tensor m: 数据类型和shape与exp_avg相同 + :param Tensor v: 数据类型和shape与exp_avg相同 + :param Scalar lr: 数据类型与exp_avg相同 + :param Scalar beta1: 数据类型与exp_avg相同 + :param Scalar beta2: 数据类型与exp_avg相同 + :param Scalar epsilon: 数据类型与exp_avg相同 + :param Tensor grad: 数据类型和shape与exp_avg相同 + :param Scalar max_grad_norm: 数据类型与exp_avg相同 + :param Scalar global_grad_norm: 数据类型与exp_avg相同 + :param Scalar weight_decay: 数据类型与exp_avg相同 + :param Tensor step_size: 默认值为None - shape为(1, ),数据类型与exp_avg一致 + :param Int adam_mode: 选择adam模式。0表示“adam”, 1表示“mbert_adam”, 默认值为0 + + 关键字参数: + out (Tensor,可选) - 输出张量。 + +示例: + +.. code-block:: python + :linenos: + + >>> var_in = torch.rand(321538).uniform_(-32., 21.).npu() + >>> m_in = torch.zeros(321538).npu() + >>> v_in = torch.zeros(321538).npu() + >>> grad = torch.rand(321538).uniform_(-0.05, 0.03).npu() + >>> max_grad_norm = -1. + >>> beta1 = 0.9 + >>> beta2 = 0.99 + >>> weight_decay = 0. + >>> lr = 0. + >>> epsilon = 1e-06 + >>> global_grad_norm = 0. + >>> var_out, m_out, v_out = torch_npu.npu_bert_apply_adam(lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay, out=(var_in, m_in, v_in)) + >>> var_out + tensor([ 14.7733, -30.1218, -1.3647, ..., -16.6840, 7.1518, 8.4872], device='npu:0') + +.. py:function:: npu_bmmV2(self, mat2, output_sizes) -> Tensor + :module: torch_npu + + 将矩阵“a”乘以矩阵“b”,生成“a*b”。支持FakeTensor模式 + + :param Tensor self: 2D或更高维度矩阵张量。数据类型:float16、float32、int32。格式:[ND, NHWC, FRACTAL_NZ] + :param Tensor mat2: 2D或更高维度矩阵张量。数据类型:float16、float32、int32。格式:[ND, NHWC, FRACTAL_NZ] + :param ListInt[] output_sizes: 输出的shape,用于matmul的反向传播 + + :rtype: Tensor + +示例: + +.. code-block:: python + :linenos: + + >>> mat1 = torch.randn(10, 3, 4).npu() + >>> mat2 = torch.randn(10, 4, 5).npu() + >>> res = torch_npu.npu_bmmV2(mat1, mat2, []) + >>> res.shape + torch.Size([10, 3, 5]) + +.. py:function:: npu_bounding_box_decode(rois, deltas, means0, means1, means2, means3, stds0, stds1, stds2, stds3, max_shape, wh_ratio_clip) -> Tensor + :module: torch_npu + + 根据rois和deltas生成标注框。自定义FasterRcnn算子 + + :param Tensor rois: 区域候选网络(RPN)生成的region of interests(ROI)。shape为(N,4)数据类型为float32或float16的2D张量。“N”表示ROI的数量, “4”表示“x0”、“x1”、“y0”和“y1” + :param Tensor deltas: RPN生成的ROI和真值框之间的绝对变化。shape为(N,4)数据类型为float32或float16的2D张量。“N”表示错误数,“4”表示“dx”、“dy”、“dw”和“dh” + :param Float means0: index + :param Float means1: index + :param Float means2: index + :param Float means33: index, 默认值为[0,0,0,0], "deltas" = "deltas" x "stds" + "means" + :param Float stds0: index + :param Float stds1: index + :param Float stds2: index + :param Float stds3: index, 默认值:[1.0,1.0,1.0,1.0], deltas" = "deltas" x "stds" + "means" + :param ListInt[2] max_shape: shape[h, w], 指定传输到网络的图像大小。用于确保转换后的bbox shape不超过“max_shape” + :param Float wh_ratio_clip: 当前水平的步长 + :param Int num_base_anchors: “dw”和“dh”的值在(-wh_ratio_clip, wh_ratio_clip)范围内 + + :rtype: Tensor + +示例: + +.. code-block:: python + :linenos: + + >>> rois = torch.tensor([[1., 2., 3., 4.], [3.,4., 5., 6.]], dtype = torch.float32).to("npu") + >>> deltas = torch.tensor([[5., 6., 7., 8.], [7.,8., 9., 6.]], dtype = torch.float32).to("npu") + >>> output = torch_npu.npu_bounding_box_decode(rois, deltas, 0, 0, 0, 0, 1, 1, 1, 1, (10, 10), 0.1) + >>> output + tensor([[2.5000, 6.5000, 9.0000, 9.0000], + [9.0000, 9.0000, 9.0000, 9.0000]], device='npu:0') + +.. py:function:: npu_broadcast(self, size) -> Tensor + :module: torch_npu + + 返回self张量的新视图,其单维度扩展,结果连续。 + + :param Tensor self: 输入张量。 + :param ListInt size: 对应扩展尺寸。 + + :rtype: Tensor + +示例: + +.. code-block:: python + :linenos: + + >>> x = torch.tensor([[1], [2], [3]]).npu() + >>> x.shape + torch.Size([3, 1]) + >>> x.npu_broadcast(3, 4) + tensor([[1, 1, 1, 1], + [2, 2, 2, 2], + [3, 3, 3, 3]], device='npu:0') + +.. py:function:: npu_ciou(Tensor self, Tensor gtboxes, bool trans=False, bool is_cross=True, int mode=0, bool atan_sub_flag=False) -> Tensor + :module: torch_npu + + 应用基于NPU的CIoU操作。在DIoU的基础上增加了penalty item,并propose CIoU。 + + :param Tensor boxes1: 格式为xywh、shape为(4, n)的预测检测框。 + :param Tensor boxes2: 相应的gt检测框,shape为(4, n)。 + :param Bool trans: 是否有偏移。 + :param Bool is_cross: box1和box2之间是否有交叉操作。 + :param Int mode: 选择CIoU的计算方式。0表示IoU,1表示IoF。 + :param Bool atan_sub_flag:是否将正向的第二个值传递给反向。 + + :rtype: Tensor + + 约束说明: + 到目前为止,CIoU向后只支持当前版本中的trans==True、is_cross==False、mode==0('iou')。如果需要反向传播,确保参数正确。 + +示例: + +.. code-block:: python + :linenos: + + >>> box1 = torch.randn(4, 32).npu() + >>> box1.requires_grad = True + >>> box2 = torch.randn(4, 32).npu() + >>> box2.requires_grad = True + >>> diou = torch_npu.contrib.function.npu_ciou(box1, box2) + >>> l = ciou.sum() + >>> l.backward() + +.. py:function:: npu_clear_float_status(self) -> Tensor + :module: torch_npu + + 在每个核中设置地址0x40000的值为0。 + + :param Tensor self: 数据类型为float32的张量。 + + :rtype: Tensor + +示例: + +.. code-block:: python + :linenos: + + >>> x = torch.rand(2).npu() + >>> torch_npu.npu_clear_float_status(x) + tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='npu:0') + +.. py:function:: npu_confusion_transpose(self, perm, shape, transpose_first) -> Tensor + :module: torch_npu + + 混淆reshape和transpose运算。 + + :param Tensor self: 数据类型:float16、float32、int8、int16、int32、int64、uint8、uint16、uint32、uint64。 + :param ListInt perm: self张量的维度排列。 + :param ListInt shape: 输入shape。 + :param Bool transpose_first: 如果值为True,首先执行transpose,否则先执行reshape。 + + :rtype: Tensor + +示例: + +.. code-block:: python + :linenos: + + >>> x = torch.rand(2, 3, 4, 6).npu() + >>> x.shape + torch.Size([2, 3, 4, 6]) + >>> y = torch_npu.npu_confusion_transpose(x, (0, 2, 1, 3), (2, 4, 18), True) + >>> y.shape + torch.Size([2, 4, 18]) + >>> y2 = torch_npu.npu_confusion_transpose(x, (0, 2, 1), (2, 12, 6), False) + >>> y2.shape + torch.Size([2, 6, 12]) + +.. py:function:: npu_conv2d(input, weight, bias, stride, padding, dilation, groups) -> Tensor + :module: torch_npu + + 在由多个输入平面组成的输入图像上应用一个2D卷积。 + + :param Tensor input: shape的输入张量,值为 (minibatch, in_channels, iH, iW)。 + :param Tensor weight: shape过滤器,值为 (out_channels, in_channels/groups, kH, kW)。 + :param Tensor bias: shape偏差 (out_channels)。 + :param ListInt stride: 卷积核步长。 + :param ListInt padding: 输入两侧的隐式填充。 + :param ListInt dilation: 内核元素间距。 + :param Int groups: 对输入进行分组。In_channels可被组数整除。 + + :rtype: Tensor + +.. py:function:: npu_conv3d(input, weight, bias, stride, padding, dilation, groups) -> Tensor + :module: torch_npu + + 在由多个输入平面组成的输入图像上应用一个3D卷积。 + + :param Tensor input: shape的输入张量,值为 (minibatch, in_channels, iT, iH, iW)。 + :param Tensor weight: shape过滤器,值为 (out_channels, in_channels/groups, kT, kH, kW)。 + :param Tensor bias: shape偏差 (out_channels)。 + :param ListInt stride: 卷积核步长。 + :param ListInt padding: 输入两侧的隐式填充。 + :param ListInt dilation: 内核元素间距。 + :param Int groups: 对输入进行分组。In_channels可被组数整除。 + + :rtype: Tensor + +.. py:function:: npu_conv_transpose2d(input, weight, bias, padding, output_padding, stride, dilation, groups) -> Tensor + :module: torch_npu + + 在由多个输入平面组成的输入图像上应用一个2D转置卷积算子,有时这个过程也被称为“反卷积”。 + + :param Tensor input: shape的输入张量,值为 (minibatch, in_channels, iH, iW)。 + :param Tensor weight: shape过滤器,值为 (in_channels, out_channels/groups, kH, kW)。 + :param Tensor bias: shape偏差 (out_channels)。 + :param ListInt padding: (dilation * (kernel_size - 1) - padding) 用零来填充输入每个维度的两侧。 + :param ListInt output_padding: 添加到输出shape每个维度一侧的附加尺寸。 + :param ListInt stride: 卷积核步长。 + :param ListInt dilation: 内核元素间距。 + :param Int groups: 对输入进行分组。In_channels可被组数整除。 + + :rtype: Tensor + +.. py:function:: npu_convolution(input, weight, bias, stride, padding, dilation, groups) -> Tensor + :module: torch_npu + + 在由多个输入平面组成的输入图像上应用一个2D或3D卷积。 + + :param Tensor input: shape的输入张量,值为 (minibatch, in_channels, iH, iW) 或 (minibatch, in_channels, iT, iH, iW)。 + :param Tensor weight: shape过滤器,值为 (out_channels, in_channels/groups, kH, kW) 或 (out_channels, in_channels/groups, kT, kH, kW)。 + :param Tensor bias: shape偏差 (out_channels)。 + :param ListInt stride: 卷积核步长。 + :param ListInt padding: 输入两侧的隐式填充。 + :param ListInt dilation: 内核元素间距。 + :param Int groups: 对输入进行分组。In_channels可被组数整除。 + + :rtype: Tensor + +.. py:function:: npu_convolution_transpose(input, weight, bias, padding, output_padding, stride, dilation, groups) -> Tensor + :module: torch_npu + + 在由多个输入平面组成的输入图像上应用一个2D或3D转置卷积算子,有时这个过程也被称为“反卷积”。 + + :param Tensor input: shape的输入张量,值为 (minibatch, in_channels, iH, iW) 或 (minibatch, in_channels, iT, iH, iW)。 + :param Tensor weight: shape过滤器,值为 (in_channels, out_channels/groups, kH, kW) 或 (in_channels, out_channels/groups, kT, kH, kW)。 + :param Tensor bias: shape偏差 (out_channels)。 + :param ListInt padding: (dilation * (kernel_size - 1) - padding) 用零来填充输入每个维度的两侧。 + :param ListInt output_padding: 添加到输出shape每个维度一侧的附加尺寸。 + :param ListInt stride: 卷积核步长。 + :param ListInt dilation: 内核元素间距。 + :param Int groups: 对输入进行分组。In_channels可被组数整除。 + + :rtype: Tensor + +.. py:function:: npu_deformable_conv2d(self, weight, offset, bias, kernel_size, stride, padding, dilation=[1,1,1,1], groups=1, deformable_groups=1, modulated=True) -> (Tensor, Tensor) + :module: torch_npu + + 使用预期输入计算变形卷积输出(deformed convolution output)。 + + :param Tensor self: 输入图像的4D张量。格式为“NHWC”,数据按以下顺序存储:[batch, in_height, in_width, in_channels]。 + :param Tensor weight: 可学习过滤器的4D张量。数据类型需与self相同。格式为“HWCN”,数据按以下顺序存储:[filter_height, filter_width, in_channels / groups, out_channels]。 + :param Tensor offset: x-y坐标偏移和掩码的4D张量。格式为“NHWC”,数据按以下顺序存储:[batch, out_height, out_width, deformable_groups * filter_height * filter_width * 3]。bias (Tensor,可选) - 过滤器输出附加偏置(additive bias)的1D张量,数据按[out_channels]的顺序存储。 + :param ListInt[2] kernel_size: 内核大小,2个整数的元组/列表。 + :param ListInt stride: 4个整数的列表,表示每个输入维度的滑动窗口步长。维度顺序根据self的数据格式解释。N维和C维必须设置为1。 + :param ListInt padding: 4个整数的列表,表示要添加到输入每侧(顶部、底部、左侧、右侧)的像素数。 + + :param ListInt dilation: 4个整数的列表,表示输入每个维度的膨胀系数(dilation factor)。维度顺序根据self的数据格式解释。N维和C维必须设置为1。 + :param Int groups: int32类型单整数,表示从输入通道到输出通道的阻塞连接数。In_channels和out_channels需都可被“groups”数整除。 + :param Int deformable_groups: int32类型单整数,表示可变形组分区的数量。In_channels需可被“deformable_groups”数整除。 + :param Bool transpose_first: 默认值为True, 指定DeformableConv2D版本。True表示v2版本, False表示v1版本,目前仅支持v2。 + + :rtype: (Tensor, Tensor) + +示例: + +.. code-block:: python + :linenos: + + >>> x = torch.rand(16, 32, 32, 32).npu() + >>> weight = torch.rand(32, 32, 5, 5).npu() + >>> offset = torch.rand(16, 75, 32, 32).npu() + >>> output, _ = torch_npu.npu_deformable_conv2d(x, weight, offset, None, kernel_size=[5, 5], stride = [1, 1, 1, 1], padding = [2, 2, 2, 2]) + >>> output.shape + torch.Size([16, 32, 32, 32]) \ No newline at end of file diff --git a/_sources/sources/pytorch/examples.rst.txt b/_sources/sources/pytorch/examples.rst.txt new file mode 100644 index 0000000..1fa58d1 --- /dev/null +++ b/_sources/sources/pytorch/examples.rst.txt @@ -0,0 +1,155 @@ +功能样例 +========== + +这些示例将会帮助您快速了解如何在Ascend NPU上使用PyTorch的相关特性。 + +.. note:: + + 在运行下述示例之前,需要您已经安装了PyTorch-NPU环境,有关环境安装,请参考 :doc:`./install` + +1. 数据并行 +----------------------- + +PyTorch的数据并行主要分为以下几种:DP、DDP以及FSDP(HSDP变种),接下来将简单描述在Ascend NPU场景下如何实现上述数据并行。 + +1.1 DDP +^^^^^^^^^^ + +.. code-block:: python + :linenos: + :emphasize-lines: 11,12,31,35,39,47,49 + + # encoding: UTF-8 + + import os + import torch + import torch.distributed as dist + import torch.multiprocessing as mp + import torch.nn as nn + import torch.optim as optim + from torch.nn.parallel import DistributedDataParallel as DDP + + # 引入torch-npu包 + import torch_npu + + + class ToyModel(nn.Module): + def __init__(self): + super(ToyModel, self).__init__() + self.net1 = nn.Linear(10, 10) + self.relu = nn.ReLU() + self.net2 = nn.Linear(10, 5) + + def forward(self, x): + return self.net2(self.relu(self.net1(x))) + + + def setup(rank, world_size): + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "29500" + + # initialize the process group + dist.init_process_group("hccl", rank=rank, world_size=world_size) + + + def example(rank, world_size): + device = torch.device("npu:{}".format(rank)) + # create default process group + setup(rank, world_size) + # create local model + model = ToyModel().to(device) + # construct DDP model + ddp_model = DDP(model, device_ids=[rank]) + # define loss function and optimizer + loss_fn = nn.MSELoss() + optimizer = optim.SGD(ddp_model.parameters(), lr=0.001) + + # forward pass + outputs = ddp_model(torch.randn(20, 10).to(device)) + # backward pass + labels = torch.randn(20, 5).to(device) + loss_fn(outputs, labels).backward() + # update parameters + optimizer.step() + + + def main(): + n_npus = torch.cuda.device_count() + assert n_npus >= 2, f"Requires at least 2 NPUs to run, but got {n_npus}" + world_size = n_npus + mp.spawn(example, args=(world_size,), nprocs=world_size, join=True) + + + if __name__ == "__main__": + main() + +1.2 FSDP +^^^^^^^^^^ + +.. code-block:: python + :linenos: + :emphasize-lines: 11,12,31,35,39,47,49 + + # encoding: UTF-8 + + import os + import torch + import torch.distributed as dist + import torch.multiprocessing as mp + import torch.nn as nn + import torch.optim as optim + from torch.distributed.fsdp import FullyShardedDataParallel as FSDP + + # 引入torch-npu包 + import torch_npu + + + class ToyModel(nn.Module): + def __init__(self): + super(ToyModel, self).__init__() + self.net1 = nn.Linear(10, 10) + self.relu = nn.ReLU() + self.net2 = nn.Linear(10, 5) + + def forward(self, x): + return self.net2(self.relu(self.net1(x))) + + + def setup(rank, world_size): + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "29500" + + # initialize the process group + dist.init_process_group("hccl", rank=rank, world_size=world_size) + + + def example(rank, world_size): + device = torch.device("npu:{}".format(rank)) + # create default process group + setup(rank, world_size) + # create local model + model = ToyModel().to(device) + # construct FSDP model + ddp_model = FSDP(model, device_id=rank) + # define loss function and optimizer + loss_fn = nn.MSELoss() + optimizer = optim.SGD(ddp_model.parameters(), lr=0.001) + + # forward pass + outputs = ddp_model(torch.randn(20, 10).to(device)) + # backward pass + labels = torch.randn(20, 5).to(device) + loss_fn(outputs, labels).backward() + # update parameters + optimizer.step() + + + def main(): + n_npus = torch.cuda.device_count() + assert n_npus >= 2, f"Requires at least 2 NPUs to run, but got {n_npus}" + world_size = n_npus + mp.spawn(example, args=(world_size,), nprocs=world_size, join=True) + + + if __name__ == "__main__": + main() diff --git a/_sources/sources/pytorch/faq.rst.txt b/_sources/sources/pytorch/faq.rst.txt new file mode 100644 index 0000000..02aebd5 --- /dev/null +++ b/_sources/sources/pytorch/faq.rst.txt @@ -0,0 +1,11 @@ +FAQ +========= + +微信群 +----------- + +添加“开源小助手”微信,根据提示让小助手拉入群聊。 + +.. image:: ../../_static/images/pytorch_wechat.jpg + :width: 400px + :align: left diff --git a/_sources/sources/pytorch/index.rst.txt b/_sources/sources/pytorch/index.rst.txt new file mode 100644 index 0000000..4702187 --- /dev/null +++ b/_sources/sources/pytorch/index.rst.txt @@ -0,0 +1,11 @@ +PyTorch +=========== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst + examples.rst + api_doc.rst + faq.rst diff --git a/_sources/sources/pytorch/install.rst.txt b/_sources/sources/pytorch/install.rst.txt new file mode 100644 index 0000000..be8d5e5 --- /dev/null +++ b/_sources/sources/pytorch/install.rst.txt @@ -0,0 +1,122 @@ +安装指南 +=========================== + +跟随指导,安装在NPU上运行的PyTorch版本。 + + +1. 选择需要安装的 PyTorch 版本 +------------------------------ +准备安装 PyTorch: + +.. raw:: html + + +
+
+
+
PyTorch版本
+
PyTorch-NPU版本
+
CANN-toolkit版本
+
CPU架构
+
安装方式
+
+
+
+
PyTorch版本
+
2.3.1
+
2.2.0
+
2.1.0
+
+
+
PyTorch-NPU版本
+
null
+
+
+
CANN-toolkit版本
+
null
+
+
+
CPU架构
+
x86-64
+
aarch64
+
+
+
安装方式
+
Docker
+
pip
+
源码构建
+
+
+
+
+ + +2. 安装 PyTorch +---------------- + +.. warning:: + + 如果使用了非CANN安装时的Python环境(如Conda),请确保CANN-toolkit依赖的Python包在该环境中已经 `安装 <../ascend/quick_install.html>`_ 。 +.. raw:: html + +
+
+

备注

+

请确保已经安装了与上述CANN-toolkit版本匹配的驱动和固件。

+
+
+

+        
+
+
+
+

备注

+

请确保已经根据上述表格建议安装了对应的CANN-toolkit版本以及相应的固件和驱动,并应用了CANN-toolkit环境变量。

+
+
+

+        
+
+
+
+
+

备注

+

请确保已经根据上述表格建议安装了对应的CANN-toolkit版本以及相应的固件和驱动,并应用了CANN-toolkit环境变量。

+
+

2.1 环境依赖

+
    +
  • Python 3.8 ~ 3.10
  • +
  • 支持C++17的编译器,例如clang 或者 gcc (9.4.0及以上)
  • +
  • Conda
  • +
+
+
+
+

备注

+

请确认CXX11_ABI是关闭的,如果无法确定,建议显式关闭:

+
+
export _GLIBCXX_USE_CXX11_ABI=0
+

2.2 构建

+
+

+                
+
+
+ + +3. 验证安装结果 +------------------ + +.. code-block:: python + :linenos: + + import torch + import torch_npu + + x = torch.randn(2, 2).npu() + y = torch.randn(2, 2).npu() + z = x.mm(y) + + print(z) + +程序能够成功打印矩阵Z的值即为安装成功。 diff --git a/_sources/sources/pytorch/quick_start.rst.txt b/_sources/sources/pytorch/quick_start.rst.txt new file mode 100644 index 0000000..136bd2c --- /dev/null +++ b/_sources/sources/pytorch/quick_start.rst.txt @@ -0,0 +1,273 @@ +快速开始 +=========================== + +.. note:: + + 在运行下述示例之前,需要您已经安装了PyTorch-NPU环境,有关环境安装,请参考 :doc:`./install` + +一般来说,要在代码中使用NPU进行训练推理,需要做以下更改: + +#. 导入torch_npu扩展包 ``import torch_npu`` +#. 将模型,以及模型输入上传到NPU上 + +.. code-block:: python + :linenos: + + device= torch.device("npu") + model = model.to(device) + input = input.to(device) + +下面的实例演示了如何使用NPU进行训练和推理任务: + +1. 单卡训练 +----------------------- +以下代码使用了cifar10数据集在NPU上训练模型(截取自 `PyTorch tutorials `_),请关注高亮的内容。 + +.. code-block:: python + :linenos: + :emphasize-lines: 20,21,23,24,25,82,83,107,108,144,145,169,170 + + """ + Training an image classifier + ---------------------------- + + We will do the following steps in order: + + 1. Load and normalize the CIFAR10 training and test datasets using + ``torchvision`` + 1. Define a Convolutional Neural Network + 2. Define a loss function + 3. Train the network on the training data + 4. Test the network on the test data + + 5. Load and normalize CIFAR10 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + Using ``torchvision``, it’s extremely easy to load CIFAR10. + """ + import torch + # 引入torch-npu包 + import torch_npu + + # 定义device + device = torch.device('npu:0' if torch.npu.is_available() else 'cpu') + print(device) + + import torchvision + import torchvision.transforms as transforms + + ######################################################################## + # The output of torchvision datasets are PILImage images of range [0, 1]. + # We transform them to Tensors of normalized range [-1, 1]. + transform = transforms.Compose( + [transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + + batch_size = 4 + + trainset = torchvision.datasets.CIFAR10(root='./data', train=True, + download=True, transform=transform) + trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, + shuffle=True, num_workers=2) + + testset = torchvision.datasets.CIFAR10(root='./data', train=False, + download=True, transform=transform) + testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, + shuffle=False, num_workers=2) + + classes = ('plane', 'car', 'bird', 'cat', + 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') + + ######################################################################## + # 2. Define a Convolutional Neural Network + # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + # Copy the neural network from the Neural Networks section before and modify it to + # take 3-channel images (instead of 1-channel images as it was defined). + import torch.nn as nn + import torch.nn.functional as F + + + class Net(nn.Module): + def __init__(self): + super().__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = torch.flatten(x, 1) # flatten all dimensions except batch + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return x + + net = Net() + + # 将模型加载到NPU上 + net.to(device) + + ######################################################################## + # 3. Define a Loss function and optimizer + # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + # Let's use a Classification Cross-Entropy loss and SGD with momentum. + import torch.optim as optim + + criterion = nn.CrossEntropyLoss() + optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) + + ######################################################################## + # 4. Train the network + # ^^^^^^^^^^^^^^^^^^^^ + # + # This is when things start to get interesting. + # We simply have to loop over our data iterator, and feed the inputs to the + # network and optimize. + + for epoch in range(2): # loop over the dataset multiple times + + running_loss = 0.0 + for i, data in enumerate(trainloader, 0): + # get the inputs; data is a list of [inputs, labels] + # 将input数据发送到NPU上 + inputs, labels = data[0].to(device), data[1].to(device) + + # zero the parameter gradients + optimizer.zero_grad() + + # forward + backward + optimize + outputs = net(inputs) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + # print statistics + running_loss += loss.item() + if i % 2000 == 1999: # print every 2000 mini-batches + print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}') + running_loss = 0.0 + + print('Finished Training') + + ######################################################################## + # 5. Test the network on the test data + # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + # + # We have trained the network for 2 passes over the training dataset. + # But we need to check if the network has learnt anything at all. + # + # We will check this by predicting the class label that the neural network + # outputs, and checking it against the ground-truth. If the prediction is + # correct, we add the sample to the list of correct predictions. + # + # Let us look at how the network performs on the whole dataset. + correct = 0 + total = 0 + # since we're not training, we don't need to calculate the gradients for our outputs + with torch.no_grad(): + for data in testloader: + # 将input数据发送到NPU上 + images, labels = data[0].to(device), data[1].to(device) + # calculate outputs by running images through the network + outputs = net(images) + # the class with the highest energy is what we choose as prediction + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %') + ######################################################################## + # That looks way better than chance, which is 10% accuracy (randomly picking + # a class out of 10 classes). + # Seems like the network learnt something. + # + # Hmmm, what are the classes that performed well, and the classes that did + # not perform well: + + # prepare to count predictions for each class + correct_pred = {classname: 0 for classname in classes} + total_pred = {classname: 0 for classname in classes} + + # again no gradients needed + with torch.no_grad(): + for data in testloader: + # 将input数据发送到NPU上 + images, labels = data[0].to(device), data[1].to(device) + outputs = net(images) + _, predictions = torch.max(outputs, 1) + # collect the correct predictions for each class + for label, prediction in zip(labels, predictions): + if label == prediction: + correct_pred[classes[label]] += 1 + total_pred[classes[label]] += 1 + + + # print accuracy for each class + for classname, correct_count in correct_pred.items(): + accuracy = 100 * float(correct_count) / total_pred[classname] + print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %') + +2. 使用DeepSpeed多卡并行训练 +------------------------------- +以下代码使用了cifar10数据集,使用DeepSpeed训练模型在多张NPU卡上进行模型训练(来自 `DeepSpeed Examples `_),自DeepSpeed v0.12.6之后,代码无需任何修改,即可自动检测NPU并进行训练。 + +.. rli:: https://raw.githubusercontent.com/microsoft/DeepSpeedExamples/master/training/cifar/cifar10_deepspeed.py + :language: python + :linenos: + + +3. 使用Transforms进行模型微调 +--------------------------------- +以下代码使用了Transforms对LLM进行微调(来自 `transforms examples `_),自transforms xxx版本以及accelerator 0.21.0版本以后,代码无需任何修改,即可自动检测NPU并进行。 + +.. rli:: https://raw.githubusercontent.com/huggingface/transformers/main/examples/pytorch/language-modeling/run_clm.py + :language: python + :linenos: + + +.. code-block:: shell + :linenos: + + python run_clm.py \ + --model_name_or_path openai-community/gpt2 \ + --train_file path_to_train_file \ + --validation_file path_to_validation_file \ + --per_device_train_batch_size 8 \ + --per_device_eval_batch_size 8 \ + --do_train \ + --do_eval \ + --output_dir /tmp/test-clm + +4. 使用Diffusers进行模型微调 +--------------------------------- +以下代码使用了Diffusers对文生图模型进行微调(来自 `diffusers examples `_),自diffusers v0.27.0版本以后,代码无需任何修改,即可自动检测NPU并进行。 + + +.. rli:: https://raw.githubusercontent.com/huggingface/diffusers/main/examples/text_to_image/train_text_to_image.py + :language: python + :linenos: + + +.. code-block:: shell + :linenos: + + export MODEL_NAME="CompVis/stable-diffusion-v1-4" + export DATASET_NAME="lambdalabs/naruto-blip-captions" + + accelerate launch --mixed_precision="fp16" train_text_to_image.py \ + --pretrained_model_name_or_path=$MODEL_NAME \ + --dataset_name=$DATASET_NAME \ + --use_ema \ + --resolution=512 --center_crop --random_flip \ + --train_batch_size=1 \ + --gradient_accumulation_steps=4 \ + --gradient_checkpointing \ + --max_train_steps=15000 \ + --learning_rate=1e-05 \ + --max_grad_norm=1 \ + --lr_scheduler="constant" --lr_warmup_steps=0 \ + --output_dir="sd-pokemon-model" \ No newline at end of file diff --git a/_sources/sources/sd_webui/index.rst.txt b/_sources/sources/sd_webui/index.rst.txt new file mode 100644 index 0000000..9c910cb --- /dev/null +++ b/_sources/sources/sd_webui/index.rst.txt @@ -0,0 +1,8 @@ +Stable-Diffusion-WebUI +============================ + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/_sources/sources/sd_webui/install.rst.txt b/_sources/sources/sd_webui/install.rst.txt new file mode 100644 index 0000000..f65cd6e --- /dev/null +++ b/_sources/sources/sd_webui/install.rst.txt @@ -0,0 +1,76 @@ +安装指南 +================== + +本文面向昇腾开发者,帮助开发者完成stable-diffusion-webui在昇腾上的安装 + +.. note:: + + 请确保环境安装了对应的固件和驱动,详情请参考 `快速安装昇腾环境 <../ascend/quick_install.html>`_。 + + +安装miniconda +---------------- + +.. code-block:: shell + :linenos: + + mkdir -p ~/miniconda3 + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh -O ~/miniconda3/miniconda.sh + bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 + rm -rf ~/miniconda3/miniconda.sh + ~/miniconda3/bin/conda init bash + ~/miniconda3/bin/conda init zsh + +使用conda创建环境 +--------------------- + +.. code-block:: shell + :linenos: + + conda create -n python310 python=3.10.6 + conda activate python310 + +安装stable-diffusion-webui +---------------------------------- + +- 自动安装命令如下: + +.. code-block:: shell + :linenos: + + git clone --branch dev https://github.com/AUTOMATIC1111/stable-diffusion-webui.gitcd stable-diffusion-webui + + #此命令将在首次安装时自动在 Ascend 设备上安装 torch 和 torch_npu。 + ./webui.sh --listen --skip-torch-cuda-test --no-half + + +- 手动安装: + +.. code-block:: shell + :linenos: + + # install stable-diffusion-webui + git clone --branch dev https://github.com/AUTOMATIC1111/stable-diffusion-webui.git + cd stable-diffusion-webui + python -m venv venv + source ./venv/bin/activate + pip install torch==2.1.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + pip install torch_npu==2.1.0 + pip install https://github.com/openai/CLIP/archive/d50d76daa670286dd6cacf3bcd80b5e4823fc8e1.zip --prefer-binary + pip install https://github.com/mlfoundations/open_clip/archive/bb6e834e9c70d9c27d0dc3ecedeebeaeb1ffad6b.zip + pip install -U -I --no-deps xformers==0.0.23.post1 + pip install install ngrok + mkdir repositories + git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui-assets.git stable-diffusion-webui-assets + git -C stable-diffusion-webui-assets checkout 6f7db241d2f8ba7457bac5ca9753331f0c266917 + git clone https://github.com/Stability-AI/stablediffusion.git stable-diffusion-stability-ai + git -C stable-diffusion-stability-ai checkout cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf + git clone https://github.com/Stability-AI/generative-models.git generative-models + git -C generative-models checkout 45c443b316737a4ab6e40413d7794a7f5657c19f + git clone https://github.com/crowsonkb/k-diffusion.git k-diffusion + git -C k-diffusion checkout ab527a9a6d347f364e3d185ba6d714e22d80cb3c + git clone https://github.com/salesforce/BLIP.git BLIP + git -C BLIP checkout 48211a1594f1321b00f14c9f7a5b4813144b2fb9 + pip install -r requirements.txt + pip install -r requirements_npu.txt + diff --git a/_sources/sources/sd_webui/quick_start.rst.txt b/_sources/sources/sd_webui/quick_start.rst.txt new file mode 100644 index 0000000..0782bac --- /dev/null +++ b/_sources/sources/sd_webui/quick_start.rst.txt @@ -0,0 +1,94 @@ +快速开始 +============ + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装指南 <./install>` 准备好昇腾环境及stable-diffusion-webui! + + +参数说明 +--------------- + +主要参数 +<<<<<<<<<<<<<< + +.. figure:: ./images/mainparameters.png + :align: center + +- Stable Diffusion checkpoint + +用于更换模型文件,v1-5-pruned-emaonly.safetensors为stable-diffusion-webui的默认模型文件,更换其他模型文件需自行下载。 + +- Prompt + +正面提示词,构成提示词的基础,直接描述想要生成的图像内容、风格、情感等作为元素权重的关键词,让AI更倾向于在绘图中绘制和Prompt的内容相关的元素。 + +- Negative Prompt + +反向提示词,作用与Prompt相反,反向加权的权重关系,减少某些元素出现的频率,从而约束AI的行为。 + +- Generate + +即开始生成图片按钮。 + +其他参数 +<<<<<<<<<<<<<<<< + +.. figure:: ./images/moreparameters.png + :align: center + +- Sampling method + +即采样方法,采样方法本身并没有绝对意义上的优劣之分,只有是否合适这一说: + + + Euler方法,是比较成熟的一种采样方法,效果比较稳定 + + + LMS:这个是最小均方误差算法,这是一个自适应的滤波器。 + + + Heun:这个是建立在欧拉方法基础上的一个在给定初始条件下求解常微分方程的方法。 + + + DPM:这是一个深度学习的PDE(偏微分方程)增强方法。 + +- Sampling Steps + +即采样步长,它并不是越大越好,同样也不是越小越好,太小采样的随机性会很高,太大采样的效率会很低,拒绝概率高。 + +- seed + +seed即为种子,-1时生成一个随机数,这个随机数影响画面的内容,相当于手动初始了神经网络的权重参数,在配合其他相同参数的情况下能得到一个极其类似的结果。 + +- Width & Height + +生成图片的宽和高 + +文生图 +----------------- + +文生图就是根据文字生成图片,主要操作为点击Stable Diffusion checkpoint选择模型,在Prompt和Negative Prompt填入提示词,点击Generate按钮生成图片。 + +以下是根据提示词生成的图片: + +Prompt:a cute cat + +Negative Prompt:deformed, lowres, bad anatomy + +.. figure:: ./images/cat.png + :align: center + +图生图 +-------------------- + +图生图(img2img)是让AI参照现有的图片生图: + +如上传一张真人照片,让AI把他改绘成动漫人物;上传画作线稿,让AI自动上色;上传一张黑白照,让AI把它修复成彩色相片。 + +参数和操作与文生图重叠,这里不在赘述。 + +以下是图片生成的效果: + +Prompt:a cute cat wear a hat + +Negative Prompt:deformed, lowres, bad anatomy + +.. figure:: ./images/catwearhat.png + :align: center diff --git a/_sources/sources/sentence_transformers/index.rst.txt b/_sources/sources/sentence_transformers/index.rst.txt new file mode 100644 index 0000000..c96404b --- /dev/null +++ b/_sources/sources/sentence_transformers/index.rst.txt @@ -0,0 +1,8 @@ +Sentence Transformers +============================= + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/_sources/sources/sentence_transformers/install.rst.txt b/_sources/sources/sentence_transformers/install.rst.txt new file mode 100644 index 0000000..1f521fc --- /dev/null +++ b/_sources/sources/sentence_transformers/install.rst.txt @@ -0,0 +1,30 @@ +安装指南 +=============== + +本教程面向使用 sentence-transformers & 昇腾的开发者,帮助完成昇腾环境下 sentence-transformers 的安装。 + +昇腾环境安装 +--------------- + +请根据已有昇腾产品型号及 CPU 架构等按照 :doc:`快速安装昇腾环境指引 <../ascend/quick_install>` 进行昇腾环境安装。 + +.. warning:: + CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。 + +sentence-transformers 下载安装 +--------------------------------------------------- + +1. 安装项目所需依赖 + +.. code-block:: shell + + pip install sentence-transformers -i https://pypi.tuna.tsinghua.edu.cn/simple + +2. 安装 torch_npu + +.. code-block:: shell + + pip install torch==2.1.0 torch_npu==2.1.0.post6 -i https://pypi.tuna.tsinghua.edu.cn/simple + +.. hint:: + torch_npu 的版本需要匹配 torch 的版本,详细信息请参考:`Ascend Extension for PyTorch `_。 diff --git a/_sources/sources/sentence_transformers/quick_start.rst.txt b/_sources/sources/sentence_transformers/quick_start.rst.txt new file mode 100644 index 0000000..366007f --- /dev/null +++ b/_sources/sources/sentence_transformers/quick_start.rst.txt @@ -0,0 +1,46 @@ +快速开始 +=============== + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 sentence-transformers ! + +本教程以 `all-MiniLM-L6-v2` 模型为例,讲述如何使用 sentence-transformers 在昇腾 NPU 上实现文本数据的 Embedding。 + +前置准备 +--------------- + +本篇样例代码为 sentence-transformers 的官方样例,需提前进行下载: + +.. code-block:: + + git clone https://github.com/UKPLab/sentence-transformers.git + +使用模型 +--------------- + +进入 sentence-transformers 项目目录,依次执行如下命令: + +.. code-block:: + + cd examples/applications/computing-embeddings + python computing_embeddings.py + +出现如下日志则代表执行成功: + +:: + + 2024-10-15 08:11:36 - Use pytorch device_name: npu + 2024-10-15 08:11:36 - Load pretrained SentenceTransformer: all-MiniLM-L6-v2 + [W compiler_depend.ts:623] Warning: expandable_segments currently defaults to false. You can enable this feature by `export PYTORCH_NPU_ALLOC_CONF = expandable_segments:True`. (function operator()) + Batches: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.61it/s] + Sentence: This framework generates embeddings for each input sentence + Embedding: [-0.01375547 -0.04301599 -0.01562478 ... 0.10029524 0.12379668 -0.04230832] + + Sentence: Sentences are passed as a list of string. + Embedding: [ 0.05640831 0.05488579 0.03137118 ... 0.06652435 0.08493122 -0.03337045] + + Sentence: The quick brown fox jumps over the lazy dog. + Embedding: [0.04393559 0.05903088 0.04824848 ... 0.05215353 0.05615513 0.10205095] + +可以看到该模型成功生成了这些句子对应的 Embedding 向量。 diff --git a/_sources/sources/timm/index.rst.txt b/_sources/sources/timm/index.rst.txt new file mode 100644 index 0000000..a2141af --- /dev/null +++ b/_sources/sources/timm/index.rst.txt @@ -0,0 +1,8 @@ +timm +=========== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/_sources/sources/timm/install.rst.txt b/_sources/sources/timm/install.rst.txt new file mode 100644 index 0000000..067b663 --- /dev/null +++ b/_sources/sources/timm/install.rst.txt @@ -0,0 +1,70 @@ +安装指南 +============== + +本教程面向使用 pytorch-image-models (timm) & 昇腾的开发者,帮助完成昇腾环境下 timm 的安装。 + +昇腾环境安装 +------------ + +请根据已有昇腾产品型号及CPU架构等按照 :doc:`快速安装昇腾环境指引 <../ascend/quick_install>` 进行昇腾环境安装。 + +.. warning:: + CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。 + +Python 环境创建 +---------------------- + +.. code-block:: shell + :linenos: + + # 创建名为 timm 的 python 3.10 的虚拟环境 + conda create -y -n timm python=3.10 + # 激活虚拟环境 + conda activate + + +timm 安装 +---------------------- + +使用以下指令安装 timm: + +.. code-block:: shell + :linenos: + + pip install timm -i https://pypi.tuna.tsinghua.edu.cn/simple + +torch-npu 安装 +---------------------- + +按照 :doc:`torch-npu 安装指引 <../pytorch/install>` 安装 2.2.0 版本 torch 和 torch-npu,或使用以下指令快速安装: + +.. code-block:: shell + :linenos: + + # install the dependencies + pip3 install attrs numpy==1.26.4 decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions -i https://pypi.tuna.tsinghua.edu.cn/simple + # install torch and torch-npu + pip install torch==2.2.0 torch-npu==2.2.0 -i https://pypi.tuna.tsinghua.edu.cn/simple + +安装校验 +---------------------- + +使用以下 Python 脚本对 open_clip 的安装进行校验,正确打印 open_clip 的版本号和 NPU 卡号说明安装成功。 + +.. code-block:: python + :linenos: + :emphasize-lines: 2 + + import torch + import torch_npu + import timm + + print("timm version:", timm.version.__version__) + print("NPU devices:", torch.npu.current_device()) + +正确回显如下(单卡 NPU 环境): + +.. code-block:: shell + + timm version: 1.0.8.dev0 + NPU devices: 0 diff --git a/_sources/sources/timm/quick_start.rst.txt b/_sources/sources/timm/quick_start.rst.txt new file mode 100644 index 0000000..a985bb6 --- /dev/null +++ b/_sources/sources/timm/quick_start.rst.txt @@ -0,0 +1,148 @@ +快速开始 +================== + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 timm ! + +本文档帮助昇腾开发者快速使用 timm × 昇腾 进行训练和推理。 + +导入 torch-npu +--------------------- + +首先在入口脚本(如本文档中的 ``train.py``, ``validate.py`` , ``inference.py``)导入 torch 后,导入 torch-npu: + +.. code-block:: python + :linenos: + :emphasize-lines: 2 + + import torch + import torch-npu + + +单卡/分布式训练 +--------------------- + +以 ``ImageNet-1000`` 数据集的训练为例,使用以下脚本启动单卡/多卡 NPU 上基于 timm 的图像分类模型训练: + +.. note:: + + 请根据您的 NPU 环境指定 NPU 卡数量 ``num_npus`` 和模型名称/路径 ``model`` ,并替换数据集路径 ``path/to/dataset/ImageNet-1000`` + +.. code-block:: shell + :linenos: + :emphasize-lines: 1,3 + + num_npus=1 + ./distributed_train.sh $num_npus path/to/dataset/ImageNet-1000 \ + --device npu \ + --model seresnet34 \ + --sched cosine \ + --epochs 150 \ + --warmup-epochs 5 \ + --lr 0.4 \ + --reprob 0.5 \ + --remode pixel \ + --batch-size 256 \ + --amp -j 4 + + +模型验证 +--------------------- + +.. note:: + + 请根据实际情况替换验证集数据路径 ``path/to/data`` 、模型路径 ``path/to/model`` + +.. code-block:: shell + :linenos: + + python validate.py path/to/data --device npu --model path/to/model --batch-size 64 --pretrained + + +正常输出验证过程日志及最终验证结果 ``result`` 说明验证成功,如下为一种示例(根据模型及数据集不同,日志会有区别): + +.. code-block:: shell + + Validating in float32. AMP not enabled. + Loading pretrained weights from Hugging Face hub (timm/tiny_vit_21m_512.dist_in22k_ft_in1k) + [timm/tiny_vit_21m_512.dist_in22k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors. + Model ./model_ckpts/tiny_vit_21m_512 created, param count: 21268120 + Data processing configuration for current model + dataset: + input_size: (3, 512, 512) + interpolation: bicubic + mean: (0.485, 0.456, 0.406) + std: (0.229, 0.224, 0.225) + crop_pct: 1.0 + crop_mode: squash + Test: [ 0/157] Time: 7.083s (7.083s, 9.04/s) Loss: 0.4765 (0.4765) Acc@1: 93.750 ( 93.750) Acc@5: 96.875 ( 96.875) + Test: [ 10/157] Time: 0.400s (1.008s, 63.50/s) Loss: 0.6594 (0.4929) Acc@1: 78.125 ( 87.926) Acc@5: 98.438 ( 98.011) + Test: [ 20/157] Time: 0.399s (0.719s, 89.04/s) Loss: 0.1891 (0.4682) Acc@1: 96.875 ( 89.435) Acc@5: 100.000 ( 98.289) + + ... ... + + * Acc@1 86.040 (13.960) Acc@5 97.750 (2.250) + --result + { + "model": "./model_ckpts/tiny_vit_21m_512", + "top1": 86.04, + "top1_err": 13.96, + "top5": 97.75, + "top5_err": 2.25, + "param_count": 21.27, + "img_size": 512, + "crop_pct": 1.0, + "interpolation": "bicubic" + } + +模型推理 +------------------ + +.. note:: + + 请根据实际情况替换验证集数据路径 ``path/to/data`` 和模型权重路径 ``path/to/checkpoint/model_best.pth.tar`` + + +.. code-block:: shell + :linenos: + :emphasize-lines: 2 + + python inference.py ../open_clip/data/ImageNet-1000/val/ \ + --device npu \ + --batch-size 64 \ + --model ./model_ckpts/tiny_vit_21m_512 \ + --label-type detail \ + --topk 5 + +正常输出验证过程日志及最终验证结果 ``result`` 说明验证成功,如下为一种示例(根据模型及数据集不同,日志会有区别): + +.. code-block:: shell + + Running inference in float32. AMP not enabled. + Loading pretrained weights from Hugging Face hub (timm/tiny_vit_21m_512.dist_in22k_ft_in1k) + [timm/tiny_vit_21m_512.dist_in22k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors. + Model ./model_ckpts/tiny_vit_21m_512 created, param count: 21268120 + Predict: [0/157] Time 6.418 (6.418) + Predict: [10/157] Time 0.394 (0.942) + Predict: [20/157] Time 0.427 (0.708) + + ... ... + + "ILSVRC2012_val_00005844.JPEG":{ + "label":[ + "stinkhorn, carrion fungus: any of various ill-smelling brown-capped fungi of the order Phallales", + "earthstar: any fungus of the family Geastraceae; in form suggesting a puffball whose outer peridium splits into the shape of a star", + "coral fungus: any of numerous fungi of the family Clavariaceae often brightly colored that grow in often intricately branched clusters like coral", + "mushroom: fleshy body of any of numerous edible fungi", + "gyromitra: any fungus of the genus Gyromitra" + ], + "prob":[ + 0.878154695, + 0.0030552391, + 0.0012754521, + 0.0010740706, + 0.000946458 + ] + }, + + ... ... \ No newline at end of file diff --git a/_sources/sources/transformers/fine-tune.rst.txt b/_sources/sources/transformers/fine-tune.rst.txt new file mode 100644 index 0000000..e520f20 --- /dev/null +++ b/_sources/sources/transformers/fine-tune.rst.txt @@ -0,0 +1,250 @@ +微调预训练模型 +================== + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装指南 <./install>` 准备好昇腾环境及transformers! + +大模型微调本质是利用特定领域的数据集对已预训练的大模型进行进一步训练的过程。它旨在优化模型在特定任务上的性能,使模型能够更好地适应和完成特定领域的任务。 +本文在使用transformers库选定相关数据集和预训练模型的基础上,通过超参数调优完成对模型的微调。 + +前置准备 +----------------- + +安装必要库 +<<<<<<<<<<<<<<< + +.. code-block:: shell + :linenos: + + pip install transformers datasets evaluate accelerate scikit-learn + +加载数据集 +<<<<<<<<<<<<<<<<<<< + +模型训练需要使用数据集,这里使用 `Yelp Reviews dataset `_ : + +.. code-block:: python + :linenos: + + from datasets import load_dataset + + # load_dataset 会自动下载数据集并将其保存到本地路径中 + dataset = load_dataset("yelp_review_full") + #输出数据集的第100条数据 + dataset["train"][100] + +输出如下: + +.. code-block:: shell + + {'label': 0, 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularly...that takes something special!\\n + The cashier took my friends\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the + person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\'s meal. After watching two people who ordered after + me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \\"serving off their orders\\" when they didn\'t have + their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\\nThe + manager was rude when giving me my order. She didn\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that + I felt I was getting poor service.\\nI\'ve eaten at various McDonalds restaurants for over 30 years. I\'ve worked at more than one location. I expect + bad days, bad moods, and the occasional mistake. But I have yet to have a decent experience at this store. It will remain a place I avoid unless someone + in my party needs to avoid illness from low blood sugar. Perhaps I should go back to the racially biased service of Steak n Shake instead!'} + + +预处理数据集 +<<<<<<<<<<<<<<<<< + +预处理数据集需要使用AutoTokenizer,它用来自动获取与模型匹配的分词器,分词器根据规则将文本拆分为标记,并转换为张量作为模型输入, +下面用到了Meta-Llama-3-8B-Instruct模型,下载模型请转至 `模型获取 <./modeldownload.html>`_,以下是一个示例: + +.. code-block:: python + :linenos: + + from transformers import AutoTokenizer + + tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") + #使用分词器处理文本 + encoded_input = tokenizer("Do not meddle in the affairs of wizards, for they are subtle and quick to anger.") + print(encoded_input) + +输出如下: + +.. code-block:: shell + + {'input_ids': [128000, 5519, 539, 1812, 91485, 304, 279, 22747, 315, 89263, 11, 369, 814, 527, 27545, 323, 4062, 311, 19788, 13], + 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]} + +接着使用dataset.map方法对数据集进行预处理: + +.. code-block:: python + :linenos: + + def tokenize_function(examples): + return tokenizer(examples["text"], padding="max_length", truncation=True) + + tokenized_datasets = dataset.map(tokenize_function, batched=True) + +初次进行预处理需要一定时间,内容如下: + +.. code-block:: shell + :linenos: + + Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no padding. + Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation. + Map: 100%|████████████████████████████████████████████████████████████████████████| 650000/650000 [03:27<00:00, 3139.47 examples/s] + Map: 100%|██████████████████████████████████████████████████████████████████████████| 50000/50000 [00:15<00:00, 3156.92 examples/s] + +训练全部的数据集会耗费更长的时间,通常将其划分为较小的训练集和验证集,以提高训练速度: + +.. code-block:: python + :linenos: + + small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000)) + small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000)) + + # 下面是加载全训练集和验证集 + # full_train_dataset = tokenized_datasets["train"] + # full_eval_dataset = tokenized_datasets["test"] + +训练 +------------ + +加载模型 +<<<<<<<<< + +使用AutoModelForCausalLM将自动加载模型: + +.. code-block:: python + :linenos: + + from transformers import AutoModelForCausalLM + + model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") + +超参数调优 +<<<<<<<<<<<<<<<<<<<<< + +超参数调优用于激活不同训练选项的标志,它定义了关于模型的更高层次的概念,例如模型复杂程度或学习能力,下边使用TrainingArguments类来加载: + +.. code-block:: python + :linenos: + + from transformers import TrainingArguments + + training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch") + +模型评估 +<<<<<<<<<<<<< + +模型评估用于衡量模型在给定数据集上的表现,包括准确率,完全匹配速率,平均并交集点等,下面是使用方式: + +.. code-block:: python + :linenos: + + import + import sklearn + import evaluate + + metric = evaluate.load("accuracy") + + #计算预测的准确性,并将预测传递给compute + def compute_metrics(eval_pred): + logits, labels = eval_pred + predictions = np.argmax(logits, axis=-1) + return metric.compute(predictions=predictions, references=labels) + + +Trainer +<<<<<<< + +使用已加载的模型、训练参数、训练和测试数据集以及评估函数创建一个Trainer对象,并调用trainer.train()来微调模型: + +.. code-block:: python + :linenos: + + from transformers import Trainer + + trainer = Trainer( + model=model, + args=training_args, + train_dataset=small_train_dataset, + eval_dataset=small_eval_dataset, + compute_metrics=compute_metrics, + ) + + trainer.train() + + +预训练全流程 +------------------- + +.. code-block:: python + :linenos: + + import torch + import torch_npu + import numpy as np + import sklearn + import evaluate + from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments + from datasets import load_dataset + + model_id = "meta-llama/Meta-Llama-3-8B-Instruct" + device = "npu:0" if torch.npu.is_available() else "cpu" + + # 加载分词器和模型 + tokenizer = AutoTokenizer.from_pretrained(model_id) + model = AutoModelForCausalLM.from_pretrained( + model_id, + torch_dtype=torch.bfloat16, + device_map="auto", + ).to(device) + + dataset = load_dataset("yelp_review_full") + + #分词函数 + def tokenize_function(examples): + return tokenizer(examples["text"], padding="max_length", truncation=True) + + tokenized_datasets = dataset.map(tokenize_function, batched=True) + + small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000)) + small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000)) + + # 加载评估指标 + metric = evaluate.load("accuracy") + + # 定义评估指标的计算函数 + def compute_metrics(eval_pred): + logits, labels = eval_pred + predictions = np.argmax(logits, axis=-1) + return metric.compute(predictions=predictions, references=labels) + + training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch") + + trainer = Trainer( + model=model, + args=training_args, + train_dataset=small_train_dataset, + eval_dataset=small_eval_dataset, + compute_metrics=compute_metrics, + ) + + trainer.train() + + +训练完成后得到以下结果: + +.. code-block:: shell + :linenos: + + |█████████████████████████████████| [375/375 06:21, Epoch 3/3] + + ===== ============= =============== ====== + Epoch Training Loss Validation Loss Accuracy + ===== ============= =============== ====== + 1 No log 1.155628 0.499000 + 2 No log 0.994618 0.574000 + 3 No log 1.026123 0.590000 + ===== ============= =============== ====== + + TrainOutput(global_step=375, training_loss=1.0557311197916666, metrics={'train_runtime': 384.55, 'train_samples_per_second': 7.801, + 'train_steps_per_second': 0.975, 'total_flos': 789354427392000.0, 'train_loss': 1.0557311197916666, 'epoch': 3.0}) diff --git a/_sources/sources/transformers/index.rst.txt b/_sources/sources/transformers/index.rst.txt new file mode 100644 index 0000000..534ccd4 --- /dev/null +++ b/_sources/sources/transformers/index.rst.txt @@ -0,0 +1,11 @@ +Transformers +================== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst + modeldownload.rst + fine-tune.rst + inference.rst \ No newline at end of file diff --git a/_sources/sources/transformers/inference.rst.txt b/_sources/sources/transformers/inference.rst.txt new file mode 100644 index 0000000..e66aca1 --- /dev/null +++ b/_sources/sources/transformers/inference.rst.txt @@ -0,0 +1,183 @@ +推理 +================== + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装指南 <./install>` 准备好昇腾环境及transformers! + +在推理阶段,训练好的模型被用于对图像、语音或文本进行分类,也可以用于语言生成、翻译等。 + +本文的模型推理以transformers的pipeline为中心进行介绍,pipelines可以自动加载模型和能够进行任务推理的预处理类,使任何模型进行任何语言、计算机视觉、语音以及多模态任务的推理变得非常简单。 + +pipeline 抽象类 +------------------ + +pipeline 抽象类是所有其他 pipeline 的封装,可以像其他任何 pipeline 一样实例化。 + +pipeline 参数由 task、tokenizer、model、optional 组成: + +- task 将确定返回哪一个 pipeline,比如 text-classification 将会返回 TextClassificationPipeline,image-to-image 将会返回 ImageToImagePipeline。 + +- tokenizer分词器是用来将输入进行编码,str或者PreTrainedTokenizer,如果未提供将使用model参数,如果model也未提供或者非str,将使用config参数,如果config参数也未提供或者非str,将提供task的默认tokenizer。 + +- model是模型,str或者PreTrainedModel,一般为有.bin模型文件的目录。 + +- optional其他参数包括,config、feature_extractor、device、device_map等。 + + +pipeline 使用 +---------------------- + +pipeline适用于音频、计算机视觉、自然语言处理和多模态任务,下面将介绍它在各场景的使用方式。 + +音频 +<<<<<<<<<<<<< + +音频识别 +>>>>>>>>>>>> + +用于提取某些音频中包含的文本,如下创建pipeline,并输入音频文件: + +.. code-block:: python + :linenos: + + from transformers import pipeline + + transcriber = pipeline(task="automatic-speech-recognition") + transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac") + + #以下为输出示例 + {'text': 'I HAVE A DREAM BUT ONE DAY THIS NATION WILL RISE UP LIVE UP THE TRUE MEANING OF ITS TREES'} + +文本转音频 +>>>>>>>>>>> + +根据输入文本和可选的其他条件输入生成音频文件: + +.. code-block:: python + :linenos: + + from transformers import pipeline + + pipe = pipeline(model="suno/bark-small") + output = pipe("Hey it's HuggingFace on the phone!") + + audio = output["audio"] + sampling_rate = output["sampling_rate"] + +计算机视觉 +<<<<<<<<<<<<<<<<< + +图像分类 +>>>>>>>>>>>>>> + +图像分类可以识别图片特征,并给出分类标签和置信度得分: + +.. code-block:: python + :linenos: + + from transformers import pipeline + + classifier = pipeline(model="microsoft/beit-base-patch16-224-pt22k-ft22k") + classifier("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png") + + #以下为输出示例 + [{'score': 0.442, 'label': 'macaw'}, {'score': 0.088, 'label': 'popinjay'}, {'score': 0.075, 'label': 'parrot'}, {'score': 0.073, 'label': 'parodist, lampooner'}, {'score': 0.046, 'label': 'poll, poll_parrot'}] + +图像转图像 +>>>>>>>>>>>>> + +它可以将图像根据信息生成新图像,以下示例通过图像超分辨率模型将低分辨率图像放大并增强其细节,使其看起来更清晰: + +.. code-block:: python + :linenos: + + from PIL import Image + import requests + from transformers import pipeline + + upscaler = pipeline("image-to-image", model="caidas/swin2SR-classical-sr-x2-64") + img = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw) + img = img.resize((64, 64)) + upscaled_img = upscaler(img) #超分辨率处理 + print(img.size) + print(upscaled_img.size) + + #以下为输出示例 + (64, 64) # 输出原图像的尺寸 + (144, 144) # 输出处理后图像的尺寸 + +自然语言处理 +<<<<<<<<<<<<<<<<< + +文本分类 +>>>>>>>>>>>>>>>>>>> + +根据标签对文本进行分类: + +.. code-block:: shell + :linenos: + + from transformers import pipeline + classifier = pipeline(model="meta-llama/Meta-Llama-3-8B-Instruct") + classifier( + "I have a problem with my iphone that needs to be resolved asap!!", + candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"], + ) + #以下为输出示例 + #{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]} + +文本生成 +>>>>>>>>>>>>>>>>> + +根据文本生成对话响应: + +.. code-block:: shell + :linenos: + + from transformers import pipeline + + generator = pipeline(model="HuggingFaceH4/zephyr-7b-beta") + # Zephyr-beta is a conversational model, so let's pass it a chat instead of a single string + generator([{"role": "user", "content": "What is the capital of France? Answer in one word."}], do_sample=False, max_new_tokens=2) + + #以下为输出示例 + [{'generated_text': [{'role': 'user', 'content': 'What is the capital of France? Answer in one word.'}, {'role': 'assistant', 'content': 'Paris'}]}] + +多模态 +<<<<<<<<<<<<<< + +视觉问答 +>>>>>>>>>>>>> + +VQA使用图像和关于该图像的问题进行提问,图像可以是URL或图像的本地路径: + +.. code-block:: shell + :linenos: + + from transformers import pipeline + vqa = pipeline(model="meta-llama/Meta-Llama-3-8B-Instruct") + output = vqa( + image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png", + question="What is the invoice number?", + ) + output[0]["score"] = round(output[0]["score"], 3) + + #以下为输出示例 + #[{'score': 0.425, 'answer': 'us-001', 'start': 16, 'end': 16}] + +图像转文本 +>>>>>>>>>>>>>>>>>>>> + +用于预测给定图像的主题: + +.. code-block:: shell + :linenos: + + from transformers import pipeline + + captioner = pipeline(model="ydshieh/vit-gpt2-coco-en") + captioner("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png") + + #以下为输出示例 + [{'generated_text': 'two birds are standing next to each other '}] diff --git a/_sources/sources/transformers/install.rst.txt b/_sources/sources/transformers/install.rst.txt new file mode 100644 index 0000000..023c183 --- /dev/null +++ b/_sources/sources/transformers/install.rst.txt @@ -0,0 +1,90 @@ +安装指南 +=========== + +本文将介绍如何在昇腾环境下使用transfomers,帮助开发者完成transformers的安装。 + +.. note:: + + 请确保环境安装了对应的固件和驱动,详情请参考 `快速安装昇腾环境 <../ascend/quick_install.html>`_。 + +创建虚拟环境 +-------------------- + +首先需要安装并激活python环境: + +.. code-block:: shell + + conda create -n your_env_name python=3.10 + conda activate your_env_name + +同时安装依赖库: + +.. code-block:: shell + + # install torch + pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple torch==2.2.0 + + # install torch-npu + pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple torch-npu==2.2.0 + +安装transformers +---------------------- + +直接使用pip命令进行安装: + +.. code-block:: shell + + pip install -i https://pypi.tuna.tsinghua.edu.cn/simple transformers + +验证安装 +-------------------- + +.. code-block:: python + + from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline + import torch + import torch_npu + + # 检查 NPU 是否可用 + if torch.npu.is_available(): + device = torch.device("npu:0") + print("NPU is available. Using NPU.") + else: + device = torch.device("cpu") + print("NPU is not available. Using CPU.") + + model_id = "bert-base-uncased" + tokenizer = AutoTokenizer.from_pretrained(model_id) + model = AutoModelForSequenceClassification.from_pretrained(model_id) + + model.to(device) + + nlp_pipeline = pipeline( + "sentiment-analysis", + model=model, + tokenizer=tokenizer, + device=0 if torch.npu.is_available() else -1 + ) + + #分析句子情感并输出 + result = nlp_pipeline("This is a test sentence.") + print(result) + + +如果成功运行并输出下面内容,则安装成功: + +.. code-block:: shell + + NPU is available. Using NPU. + Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight'] + You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. + [{'label': 'POSITIVE', 'score': 0.9998704791069031}] + +卸载transformers +--------------------- + +.. code-block:: shell + + pip uninstall transformers + + diff --git a/_sources/sources/transformers/modeldownload.rst.txt b/_sources/sources/transformers/modeldownload.rst.txt new file mode 100644 index 0000000..4e883d8 --- /dev/null +++ b/_sources/sources/transformers/modeldownload.rst.txt @@ -0,0 +1,126 @@ +模型获取 +============== + +本文以Meta-Llama-3-8B-Instruct模型为例,介绍如何进行模型的获取, +该模型获取目前主要有三种方式,Meta官方_,HuggingFace_,hf-mirror_, 下面将详细说明这三种获取模型的方法。 + +Meta官方 +----------------- + +下载模型前需要获取licence,前往 `Meta官网 `_,提供信息获取到许可证,拿到已签名的URL。 + +- 链接类似于下面: + +.. code-block:: shell + :linenos: + + https://download6.llamameta.net/*?Policy=eyJTdGF0ZW1lbnQiOlt7InVuaXF1ZV9oYXNoIjoibGJuYXc0bzdrY2pqNnoxeXZ1N3hmcmNvIiwiUmVzb3VyY2UiOiJodHRwczp + cL1wvZG93bmxvYWQ2LmxsYW1hbWV0YS5uZXRcLyoiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE3MTY0MzYyMTF9fX1dfQ__&Signature=KTyc + LZkPxqMYY0XqW047tNN9IWX%7EOxlQbqCsDqmcX0vE8oia3Qej-x6aGFQSJhkHRULu8Efso5Qde8KRiptK5rGh9oLrtMeAS3SID%7EOyk38o9NNLKxWokA7yQxwvUVRqibVMJyhkE8XE + K2HDNftKT9KLaDG8HHFQmGWuhdTJSvCezJIRKWPtzRf0dohepOiOHOcQW%7Ermo7m6iI595PuoX7o3bVYpFYQf1Syrp05XCr9t2-Rzf8xaIYF5-2vFqELFyFyJys%7E5lA4178elcJcU + ImSSokn1IJBARAZ0iLaWDFsuTbvDJmz9j-ccHFJzgDPCMLQjHpK6QfCk4TWGmdyXMg__&Key-Pair-Id=K15QRJLYKIFSLZ&Download-Request-ID=1502880093958574 + +- 之后获取源码,使用以下命令下载并进入到工作目录: + +.. code-block:: shell + :linenos: + + git clone https://github.com/meta-llama/llama3.git + cd llama3 + +- 运行脚本: + +.. code-block:: python + :linenos: + + ./download.sh + +运行时输入上边获取到的URL,即可进行模型的下载。 + + +HuggingFace +-------------------- +HuggingFace同样需要获得licence,访问仓库 `meta-llama/Meta-Llama-3-8B-Instruct `_ ,接受许可后等待请求获得批准即可。 + +得到权限后,点击"文件和版本"标签,下载原始文件夹的内容或通过以下命令行下载: + +- 安装huggingface-hub: + +.. code-block:: shell + + pip install huggingface-hub + +- 下载文件: + +.. code-block:: shell + + huggingface-cli download meta-llama/Meta-Llama-3-8B-Instruct --include “original/*” --local-dir meta-llama/Meta-Llama-3-8B-Instruct + +以上两种方法国内用户可能无法完成,下面推荐 **国内用户** 的获取模型的方式。 + +hf-mirror +------------------- + +hf-mirror是更适合国内用户获取模型的方式,它是HuggingFace平台的镜像网站, 提供了一个备用的域名来访问HuggingFace的资源和功能, +以 `Qwen2-7B-Instruct `_ 为例(Meta-Llama-3-8B-Instruct同样需要获取license,不方便国内用户, 这里用Qwen2代替说明), 共有三种方法,下面依次进行介绍。 + +直接下载 +<<<<<<<<<<<<<<< + +点击模型的下的 **↓** 图标下载文件,如下: + +.. figure:: ./images/downloadmodel.png + :align: center + +修改镜像源 +<<<<<<<<<<<<<<<<<< + +- 修改环境变量HF_ENDPOINT,该变量会替换huggingface.co域名: + +.. code-block:: shell + :linenos: + + # 临时生效 + export HF_ENDPOINT=https://hf-mirror.com + # 永久生效 + echo export HF_ENDPOINT=https://hf-mirror.com >> ~/.bashrc + + +- 安装huggingface-hub: + +.. code-block:: shell + + pip install huggingface-hub + + +- 下载文件: + +.. code-block:: python + :linenos: + + # huggingface_hub下载单个文件 + from huggingface_hub import hf_hub_download + hf_hub_download(repo_id="Qwen/Qwen2-7B-Instruct", filename="config.json", cache_dir="./your/path/Qwen") + + # huggingface_hub下载整个项目 + from huggingface_hub import snapshot_download + snapshot_download(repo_id="Qwen/Qwen2-7B-Instruct", cache_dir="./your/path/Qwen") + +git lfs +<<<<<<<<<<<<<<<<<<< + +使用以下命令下载模型: + +.. code-block:: shell + :linenos: + + # Make sure you have git-lfs installed (https://git-lfs.com) + git lfs install + + git clone https://hf-mirror.com/Qwen/Qwen2-7B-Instruct + + # If you want to clone without large files - just their pointers + # GIT_LFS_SKIP_SMUDGE=1 git clone https://hf-mirror.com/Qwen/Qwen2-7B-Instruct + + +使用以上任意一种方式即可完成模型的获取,将模型保存在本地路径后可以进行 `微调预训练模型 <./fine-tune.html>`_ 和 `推理 <./inference.html>`_ 等操作。 \ No newline at end of file diff --git a/_sources/sources/transformers/quick_start.rst.txt b/_sources/sources/transformers/quick_start.rst.txt new file mode 100644 index 0000000..c044e11 --- /dev/null +++ b/_sources/sources/transformers/quick_start.rst.txt @@ -0,0 +1,131 @@ +快速开始 +============ + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装指南 <./install>` 准备好昇腾环境及transformers! + + +本文以Meta-Llama-3-8B-Instruct模型为例,介绍如何通过transformers使用模型进行推理, +针对模型推理transformers提供了 AutoModelForCausalLM_,pipeline_ 两种方式,下面将说明这两种接口的使用方式。 + +.. note:: + + 以下模型用到了Meta-Llama-3-8B-Instruct, 具体可以参考 `模型获取 <./modeldownload.html>`_ 。 + +AutoModelForCausalLM +----------------------------------------------- + +.. code-block:: python + :linenos: + + import torch + import torch_npu + from transformers import AutoModelForCausalLM, AutoTokenizer + + model_id = "meta-llama/Meta-Llama-3-8B-Instruct" + device = "npu:0" if torch.npu.is_available() else "cpu" + + tokenizer = AutoTokenizer.from_pretrained(model_id) + model = AutoModelForCausalLM.from_pretrained( + model_id, + torch_dtype=torch.bfloat16, + device_map="auto", + ).to(device) + + +pipeline +------------------------- + +.. code-block:: python + :linenos: + + import transformers + import torch + import torch_npu + + model_id = "meta-llama/Meta-Llama-3-8B-Instruct" + device = "npu:0" if torch.npu.is_available() else "cpu" + + pipeline = transformers.pipeline( + "text-generation", + model=model_id, + model_kwargs={"torch_dtype": torch.bfloat16}, + device=device, + ) + + +全流程 +---------- + +.. code-block:: python + :linenos: + + from transformers import AutoModelForCausalLM, AutoTokenizer + import torch + import torch_npu + + #如果提前下载好模型将meta-llama/Meta-Llama-3-8B-Instruct更换为本地地址 + model_id = "meta-llama/Meta-Llama-3-8B-Instruct" + device = "npu:0" if torch.npu.is_available() else "cpu" # 指定使用的设备为 NPU 0 + + # 加载预训练的分词器 + tokenizer = AutoTokenizer.from_pretrained(model_id) + + # 加载预训练的语言模型, 并指定数据类型为bfloat16, 自动选择设备映射 + model = AutoModelForCausalLM.from_pretrained( + model_id, + torch_dtype=torch.bfloat16, + device_map="auto", + ).to(device) # 将模型移动到指定的设备 + + # 定义消息列表,包含系统消息和用户消息 + messages = [ + {"role": "system", "content": "You are a housekeeper chatbot who always responds in polite expression!"}, + {"role": "user", "content": "Who are you? what should you do?"}, + ] + + # 使用分词器将消息列表应用到聊天模板中,并转换为张量 + input_ids = tokenizer.apply_chat_template( + messages, + add_generation_prompt=True, + return_tensors="pt" # 返回 PyTorch 张量 + ).to(model.device) + + + # 定义终止标记,包括模型的结束标记 ID 和一个空标记 ID + terminators = [ + tokenizer.eos_token_id, + tokenizer.convert_tokens_to_ids("<|eot_id|>") + ] + + # 生成响应 + outputs = model.generate( + input_ids, + max_new_tokens=256, # 设置生成的最大token + eos_token_id=terminators, + do_sample=True, + temperature=0.6, # 设置采样温度,影响生成的多样性 + top_p=0.9, + ) + + # 获取生成的响应,排除输入的部分 + response = outputs[0][input_ids.shape[-1]:] + print(tokenizer.decode(response, skip_special_tokens=True)) + +输出示例: + +.. code-block:: shell + :linenos: + + Good day to you! My name is Housekeeper Helen, and I'm delighted to introduce myself as a friendly and efficient chatbot designed to assist with household tasks and provide helpful information. + As a housekeeper, my primary role is to ensure your home is tidy, organized, and comfortable. I'd be happy to help with: + + * Cleaning and organization tips + * Household chore schedules + * Laundry and ironing guidance + * Home maintenance advice + * And any other domestic-related queries you may have! + + Please feel free to ask me any questions or request my assistance with a specific task. I'm here to help make your life easier and your home sparkle! + diff --git a/_sources/sources/trl/index.rst.txt b/_sources/sources/trl/index.rst.txt new file mode 100644 index 0000000..2f7ce01 --- /dev/null +++ b/_sources/sources/trl/index.rst.txt @@ -0,0 +1,8 @@ +Transformer Reinforcement Learning +=================================================== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/_sources/sources/trl/install.rst.txt b/_sources/sources/trl/install.rst.txt new file mode 100644 index 0000000..f1c74e9 --- /dev/null +++ b/_sources/sources/trl/install.rst.txt @@ -0,0 +1,38 @@ +安装指南 +=============== + +本教程面向使用 TRL (Transformer Reinforcement Learning) & 昇腾的开发者,帮助完成昇腾环境下 TRL 的安装。 + +昇腾环境安装 +--------------- + +请根据已有昇腾产品型号及 CPU 架构等按照 :doc:`快速安装昇腾环境指引 <../ascend/quick_install>` 进行昇腾环境安装。 + +.. warning:: + CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。 + +TRL 下载安装 +--------------- + +1. 安装项目所需依赖 + +.. code-block:: shell + + pip install trl -i https://pypi.tuna.tsinghua.edu.cn/simple + +另外,本项目需要手动安装 transformers 仓库的最新 main 分支,否则可能会出现如下错误: + +.. image:: ./images/image.png + +.. code-block:: shell + + pip install git+https://github.com/huggingface/transformers.git + +2. 安装 torch_npu + +.. code-block:: shell + + pip install torch==2.1.0 torch_npu==2.1.0.post6 -i https://pypi.tuna.tsinghua.edu.cn/simple + +.. hint:: + torch_npu 的版本需要匹配 torch 的版本,详细信息请参考:`Ascend Extension for PyTorch `_。 diff --git a/_sources/sources/trl/quick_start.rst.txt b/_sources/sources/trl/quick_start.rst.txt new file mode 100644 index 0000000..987b5b0 --- /dev/null +++ b/_sources/sources/trl/quick_start.rst.txt @@ -0,0 +1,49 @@ +快速开始 +=============== + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 TRL (Transformer Reinforcement Learning) ! + +本教程以 DPO 方法为例,讲述如何使用 TRL 在昇腾 NPU 上进行模型的后训练。 + +前置准备 +--------------- + +本篇样例代码为 TRL 官方样例,需提前进行下载: + +.. code-block:: + + git clone https://github.com/huggingface/trl.git + +模型训练 +--------------- + +进入 TRL 项目目录,依次执行如下命令: + +.. code-block:: + + cd examples/scripts + python dpo.py + +出现如下日志则代表训练成功: + +:: + + Tokenizing train dataset: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 62135/62135 [07:11<00:00, 143.85 examples/s] + Tokenizing eval dataset: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 144.73 examples/s] + Detected kernel version 4.19.90, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. + 0%| ... | 0/3883 [00:00` 进行昇腾环境安装。 + +.. warning:: + CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。 + +Python 环境创建 +---------------------- + +.. code-block:: shell + :linenos: + + # 创建名为 wenet 的 python 3.10 的虚拟环境 + conda create -y -n wenet python=3.10 + # 激活虚拟环境 + conda activate wenet + + +WeNet 安装 +---------------------- + +使用以下指令安装带有 torch-npu 的 WeNet 及训练相关依赖: + +.. code-block:: shell + :linenos: + + # 安装带有 torch-npu 的 WeNet + pip install -e .[torch-npu] + + # 安装 WeNet 训练相关依赖 + pip install -r requirements.txt + +请遵循以下 torch-npu 相关库的版本控制: + ++------------+------------------+-----------+ +| Requirement| Minimum | Recommend | ++============+==================+===========+ +| CANN | 8.0.RC2.alpha003 | latest | ++------------+------------------+-----------+ +| torch | 2.1.0 | 2.2.0 | ++------------+------------------+-----------+ +| torch-npu | 2.1.0 | 2.2.0 | ++------------+------------------+-----------+ +| torchaudio | 2.1.0 | 2.2.0 | ++------------+------------------+-----------+ +| deepspeed | 0.13.2 | latest | ++------------+------------------+-----------+ + + + +安装校验 +---------------------- + +使用以下 Python 脚本对 open_clip 的安装进行校验,正确打印 open_clip 的版本号和 NPU 卡号说明安装成功。 + +.. code-block:: python + :linenos: + :emphasize-lines: 5,6 + + import torch + import torch_npu + import timm + + print("timm version:", timm.version.__version__) + print("NPU devices:", torch.npu.current_device()) + +正确回显如下(单卡 NPU 环境): + +.. code-block:: shell + + timm version: 1.0.8.dev0 + NPU devices: 0 diff --git a/_sources/sources/wenet/quick_start.rst.txt b/_sources/sources/wenet/quick_start.rst.txt new file mode 100644 index 0000000..4ae86de --- /dev/null +++ b/_sources/sources/wenet/quick_start.rst.txt @@ -0,0 +1,132 @@ +快速开始 +================== + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 WeNet ! + +本文档帮助昇腾开发者快速使用 WeNet × 昇腾 进行自动语音识别(Automatic Speech Recognition, ASR)模型的训练、推理和评估等。 + +WeNet 提供了多种数据集及模型的实验脚本,该脚本将实验分为几个阶段,包含数据集的下载、模型的训练、推理、评估等,均存放在 `examples `_ 路径下, +本篇以 aishell-1 数据集的实验为例,基于 WeNet `官方教程 `_ , +详述如何使用 `NPU 实验脚本 `_ 进行从零开始的语音模型训练。 + +首先进入该脚本所在目录下: + +.. code-block:: shell + :linenos: + + cd example/aishell/s0 + +下载数据 +~~~~~~~~~~~~ + +stage -1 阶段将 aishell-1 数据下载到本地路径 ``$data``: + +.. code-block:: shell + :linenos: + + bash run_npu.sh --stage -1 --stop_stage -1 + + +如果已下载数据,请更改 ``run_npu.sh`` 脚本中的变量 ``$data`` 值为实际数据集存放的绝对路径,并从下一阶段开始。 + +准备训练数据 +~~~~~~~~~~~~ + +stage 0 阶段为训练数据准备阶段,将使用 ``local/aishell_data_prep.sh`` 脚本将训练数据重新组织为 ``wav.scp`` 和 ``text`` 两部分。 + +.. note:: + + ``wav.scp`` 每行记录两个制表符分隔的列: ``wav_id`` 和 ``wav_path``, + ``text`` 每行记录两个制表符分隔的列: ``wav_id`` 和 ``text_label``。 + +.. code-block:: shell + :linenos: + + bash run_npu.sh --stage 0 --stop_stage 0 + + +提取最佳 cmvn 特征(可选) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +stage 1 阶段从训练数据中提取 cmvn 特征,本阶段为可选阶段,设置 ``cmvn=false`` 可跳过本阶段。 + +.. code-block:: shell + :linenos: + + bash run_npu.sh --stage 1 --stop_stage 1 + +``tools/compute_cmvn_stats.py`` 用于提取全局 cmvn(倒谱均值和方差归一化)统计数据,用来归一化声学特征。 + +生成 token 字典 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +stage 2 阶段生成训练所需 token 字典,用于 CTC 解码阶段查询,将输出转换为文字。 + +.. code-block:: shell + :linenos: + + bash run_npu.sh --stage 2 --stop_stage 2 + + +准备 WeNet 数据格式 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +stage 3 阶段生成 WeNet 所需格式的文件 ``data.list``: + +.. code-block:: shell + :linenos: + + bash run_npu.sh --stage 3 --stop_stage 3 + +生成的 ``data.list``每一行都是 json 格式,包含 关键词 ``key`` (文件名称), +语音文件地址 ``wav`` 和 对应文本内容 ``txt`` 三个关键数据。如下为一示例: + +.. code-block:: shell + + {"key": "BAC009S0002W0122", "wav": "/export/data/asr-data/OpenSLR/33//data_aishell/wav/train/S0002/BAC009S0002W0122.wav", "txt": "而对楼市成交抑制作用最大的限购"} + +模型训练 +~~~~~~~~~~ + +stage 4 为模型训练阶段, ``run_npu.sh`` 脚本中实现了 NPU 卡号的自动获取和相关环境变量设置,因此可直接通过以下启动昇腾 NPU 上的模型训练: + +.. code-block:: shell + :linenos: + + bash run_npu.sh --stage 4 --stop_stage 4 + +如需自行指定 NPU 卡号,请更改 ``run_npu.sh`` 脚本中的变量 ``ASCEND_RT_VISIBLE_DEVICES`` 值为指定卡号。 + +.. note:: + + 有关断点重训,参数配置等,请参考 `WeNet 官方文档 `_ 。 + +测试推理 +~~~~~~~~~~~~~~~ + +stage 5 为模型测试推理阶段,将测试集中语音文件识别为文本: + +.. code-block:: shell + :linenos: + + bash run_npu.sh --stage 5 --stop_stage 5 + +此外,stage 5 还提供平均模型的功能,平均模型指当 ``${average_checkpoint}``为 ``true`` 时, +将交叉验证集上的最佳的 ``${average_num}`` 个模型平均,生成增强模型。 + +.. note:: + + 此阶段还提供解码和 WER 模型评估等功能,详细信息请参考 WeNet `官方文档 `_ 。 + + +导出训练好的模型 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +stage 6 为模型导出阶段, ``wenet/bin/export_jit.py`` 使用 ``Libtorch`` 导出以上训练好的模型,导出的模型可用于其他编程语言(如 C++)的推理。 + +.. code-block:: shell + :linenos: + + bash run_npu.sh --stage 6 --stop_stage 6 diff --git a/_sources/sources/whisper_cpp/index.rst.txt b/_sources/sources/whisper_cpp/index.rst.txt new file mode 100644 index 0000000..b71efba --- /dev/null +++ b/_sources/sources/whisper_cpp/index.rst.txt @@ -0,0 +1,8 @@ +Whisper.cpp +=========== + +.. toctree:: + :maxdepth: 2 + + install.rst + quick_start.rst diff --git a/_sources/sources/whisper_cpp/install.rst.txt b/_sources/sources/whisper_cpp/install.rst.txt new file mode 100644 index 0000000..30af9bd --- /dev/null +++ b/_sources/sources/whisper_cpp/install.rst.txt @@ -0,0 +1,53 @@ +安装指南 +============== + +本教程面向使用 Whisper.cpp & 昇腾的开发者,帮助完成昇腾环境下 Whisper.cpp 的安装。 + +昇腾环境安装 +------------ + +请根据已有昇腾产品型号及CPU架构等按照 :doc:`快速安装昇腾环境指引 <../ascend/quick_install>` 进行昇腾环境安装。 + +.. warning:: + CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。 + +Whisper.cpp 编译安装 +---------------------- + +1. 下载 Whisper.cpp 项目到本地 + +.. code-block:: shell + :linenos: + + git clone https://github.com/ggerganov/whisper.cpp.git + +2. 在 Whisper.cpp 项目目录下,创建构建目录并进入该目录 + +.. code-block:: shell + :linenos: + + mkdir build + cd build + + +3. 编译安装 CANN 版本的 Whisper.cpp + +.. code-block:: shell + :linenos: + + cmake .. -D GGML_CANN=on + make -j + + +安装校验 +---------------------- + +编译完毕后,无任何报错信息,并输出以下关键回显即说明安装成功: + +.. code-block:: shell + + [ 90%] Built target quantize + [ 95%] Linking CXX executable ../../bin/main + [ 95%] Built target main + [100%] Linking CXX executable ../../bin/server + [100%] Built target server diff --git a/_sources/sources/whisper_cpp/quick_start.rst.txt b/_sources/sources/whisper_cpp/quick_start.rst.txt new file mode 100644 index 0000000..6d01503 --- /dev/null +++ b/_sources/sources/whisper_cpp/quick_start.rst.txt @@ -0,0 +1,154 @@ +快速开始 +================== + +.. note:: + + 阅读本篇前,请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境及 Whisper.cpp ! + +本文档帮助昇腾开发者快速使用 Whisper.cpp × 昇腾 进行自动语音识别(Automatic Speech Recognition, ASR)。 + +Whisper 模型下载 +--------------------- + +Whisper 模型是 OpenAI 训练并开源的 ASR 神经网络模型,是当前 ASR 领域主流模型之一。 +在 Whisper.cpp 中进行语音识别,需要下载 Whisper 模型并加载其 gguf 格式权重文件。 +本文提供三种模型的获取方式,请根据需要选择一种即可。 + +.. note:: + + gguf 是一种储存神经网络权重的文件格式,是一种二进制格式,旨在快速加载和保存模型,详见 `ggml 官方文档 `_ + +1. 使用脚本下载 +~~~~~~~~~~~~~~~~~~~~ + +使用 Whisper.cpp 项目中的 ``download-ggml-model.sh`` 脚本下载预先转换为 gguf 格式的 Whisper 模型: + +.. code-block:: shell + :linenos: + + ./download-ggml-model.sh base.en + +其中 ``base.en`` 可替换为所需 Whisper 模型名称,Whisper 模型名称清单: + +.. code-block:: python + :linenos: + + # Whisper models + models="tiny + tiny.en + tiny-q5_1 + tiny.en-q5_1 + base + base.en + base-q5_1 + base.en-q5_1 + small + small.en + small.en-tdrz + small-q5_1 + small.en-q5_1 + medium + medium.en + medium-q5_0 + medium.en-q5_0 + large-v1 + large-v2 + large-v2-q5_0 + large-v3 + large-v3-q5_0" + +2. 手动下载 +~~~~~~~~~~~~~~~~~~~~ + +预先转换为 gguf 格式的 Whisper 模型可由此处下载: + +- https://huggingface.co/ggerganov/whisper.cpp/tree/main +- https://ggml.ggerganov.com + +3. 自行转换模型 +~~~~~~~~~~~~~~~~~~~~ + +从 `OpenAI 提供的模型 `_ 中选择一个下载,使用以下指令完成其到 gguf 模型的转换,并将其移动至 ``./models/`` 目录下: + +.. code-block:: shell + :linenos: + + python models/convert-pt-to-ggml.py ~/.cache/whisper/medium.pt ~/path/to/repo/whisper/ ./models/whisper-medium + mv ./models/whisper-medium/ggml-model.bin models/ggml-medium.bin + + +语音文件预处理 +--------------------- + +使用 ffmpeg 转换所需处理的语音文件为 16 bit wav 语音文件,此处以 ``samples/gb0.ogg`` 为例: + +.. code-block:: shell + :linenos: + + ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav + + +自动语音识别 +--------------------- + +使用以下指令,即可完成在昇腾 NPU 上的 Whisper.cpp 自动语音识别: + +.. code-block:: shell + :linenos: + + ./build/bin/main -f samples/jfk.wav -m models/ggml-base.en.bin -t 8 + + +输出语音识别结果与对应语音内容一致表明识别正确,以下为 ``samples/jfk.wav`` 语音的正确回显示例: + +.. code-block:: shell + + whisper_init_from_file_with_params_no_state: loading model from 'models/ggml-base.en.bin' + whisper_init_with_params_no_state: use gpu = 1 + whisper_init_with_params_no_state: flash attn = 0 + whisper_init_with_params_no_state: gpu_device = 0 + whisper_init_with_params_no_state: dtw = 0 + whisper_model_load: loading model + whisper_model_load: n_vocab = 51864 + whisper_model_load: n_audio_ctx = 1500 + whisper_model_load: n_audio_state = 512 + whisper_model_load: n_audio_head = 8 + whisper_model_load: n_audio_layer = 6 + whisper_model_load: n_text_ctx = 448 + whisper_model_load: n_text_state = 512 + whisper_model_load: n_text_head = 8 + whisper_model_load: n_text_layer = 6 + whisper_model_load: n_mels = 80 + whisper_model_load: ftype = 1 + whisper_model_load: qntvr = 0 + whisper_model_load: type = 2 (base) + whisper_model_load: adding 1607 extra tokens + whisper_model_load: n_langs = 99 + whisper_model_load: CPU total size = 147.37 MB + whisper_model_load: model size = 147.37 MB + whisper_backend_init_gpu: using CANN backend + whisper_init_state: kv self size = 18.87 MB + whisper_init_state: kv cross size = 18.87 MB + whisper_init_state: kv pad size = 3.15 MB + whisper_init_state: compute buffer (conv) = 16.75 MB + whisper_init_state: compute buffer (encode) = 131.94 MB + whisper_init_state: compute buffer (cross) = 5.17 MB + whisper_init_state: compute buffer (decode) = 153.13 MB + + system_info: n_threads = 8 / 192 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | METAL = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | CUDA = 0 | COREML = 0 | OPENVINO = 0 | CANN = 1 + + main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 8 threads, 1 processors, 5 beams + best of 5, lang = en, task = transcribe, timestamps = 1 ... + + + [00:00:00.000 --> 00:00:11.000] And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country. + + + whisper_print_timings: load time = 223.83 ms + whisper_print_timings: fallbacks = 0 p / 0 h + whisper_print_timings: mel time = 19.95 ms + whisper_print_timings: sample time = 94.43 ms / 131 runs ( 0.72 ms per run) + whisper_print_timings: encode time = 632.05 ms / 1 runs ( 632.05 ms per run) + whisper_print_timings: decode time = 56.30 ms / 2 runs ( 28.15 ms per run) + whisper_print_timings: batchd time = 930.68 ms / 125 runs ( 7.45 ms per run) + whisper_print_timings: prompt time = 0.00 ms / 1 runs ( 0.00 ms per run) + whisper_print_timings: total time = 2854.32 ms diff --git a/_static/_sphinx_javascript_frameworks_compat.js b/_static/_sphinx_javascript_frameworks_compat.js new file mode 100644 index 0000000..8141580 --- /dev/null +++ b/_static/_sphinx_javascript_frameworks_compat.js @@ -0,0 +1,123 @@ +/* Compatability shim for jQuery and underscores.js. + * + * Copyright Sphinx contributors + * Released under the two clause BSD licence + */ + +/** + * small helper function to urldecode strings + * + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL + */ +jQuery.urldecode = function(x) { + if (!x) { + return x + } + return decodeURIComponent(x.replace(/\+/g, ' ')); +}; + +/** + * small helper function to urlencode strings + */ +jQuery.urlencode = encodeURIComponent; + +/** + * This function returns the parsed url parameters of the + * current request. Multiple values per key are supported, + * it will always return arrays of strings for the value parts. + */ +jQuery.getQueryParameters = function(s) { + if (typeof s === 'undefined') + s = document.location.search; + var parts = s.substr(s.indexOf('?') + 1).split('&'); + var result = {}; + for (var i = 0; i < parts.length; i++) { + var tmp = parts[i].split('=', 2); + var key = jQuery.urldecode(tmp[0]); + var value = jQuery.urldecode(tmp[1]); + if (key in result) + result[key].push(value); + else + result[key] = [value]; + } + return result; +}; + +/** + * highlight a given string on a jquery object by wrapping it in + * span elements with the given class name. + */ +jQuery.fn.highlightText = function(text, className) { + function highlight(node, addItems) { + if (node.nodeType === 3) { + var val = node.nodeValue; + var pos = val.toLowerCase().indexOf(text); + if (pos >= 0 && + !jQuery(node.parentNode).hasClass(className) && + !jQuery(node.parentNode).hasClass("nohighlight")) { + var span; + var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.className = className; + } + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + node.parentNode.insertBefore(span, node.parentNode.insertBefore( + document.createTextNode(val.substr(pos + text.length)), + node.nextSibling)); + node.nodeValue = val.substr(0, pos); + if (isInSVG) { + var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect"); + var bbox = node.parentElement.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute('class', className); + addItems.push({ + "parent": node.parentNode, + "target": rect}); + } + } + } + else if (!jQuery(node).is("button, select, textarea")) { + jQuery.each(node.childNodes, function() { + highlight(this, addItems); + }); + } + } + var addItems = []; + var result = this.each(function() { + highlight(this, addItems); + }); + for (var i = 0; i < addItems.length; ++i) { + jQuery(addItems[i].parent).before(addItems[i].target); + } + return result; +}; + +/* + * backward compatibility for jQuery.browser + * This will be supported until firefox bug is fixed. + */ +if (!jQuery.browser) { + jQuery.uaMatch = function(ua) { + ua = ua.toLowerCase(); + + var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || + /(webkit)[ \/]([\w.]+)/.exec(ua) || + /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || + /(msie) ([\w.]+)/.exec(ua) || + ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || + []; + + return { + browser: match[ 1 ] || "", + version: match[ 2 ] || "0" + }; + }; + jQuery.browser = {}; + jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; +} diff --git a/_static/ascend_actions.js b/_static/ascend_actions.js new file mode 100644 index 0000000..4113465 --- /dev/null +++ b/_static/ascend_actions.js @@ -0,0 +1,249 @@ +$(document).ready(function () { + $.reset_selection = function (elem) { + elem.parent().children().each(function () { + $(this).removeClass("selected"); + }); + } + + $.get_options = function () { + var options = {}; + $('#col-values').children().each(function () { + var elem = $(this).find(".selected").each(function () { + var id = $(this).attr("id").split("-"); + var category = id[0]; + var value = id[1]; + if (value === "version") { + if (category === "cann") + value = $(this).val(); + else + value = $(this).data('version'); + } + options[category] = value; + }); + }); + return options; + } + + $.get_docker_os_versions = function (options) { + var os_versions = {}; + $.each(docker_images, function (idx, image) { + var tag = image.split(":")[1]; + var tag_items = tag.split("-"); + var npu_type = tag_items[1]; + + var os = tag_items[2]; + var index = os.search(/\d/); + var os_type = os.substring(0, index); + var os_version = os.substring(index); + + if (options['os'] === os_type && options['npu'] === npu_type) { + if (!os_versions[os_type]) { + os_versions[os_type] = new Set(); + } + os_versions[os_type].add(os_version); + } + }); + return os_versions; + } + + $.update_os_verions = function () { + $("#row-os_version").find("div").not(":first").remove(); + var options = $.get_options(); + // update os_versions + var os_versions = $.get_docker_os_versions(options); + var selected_os_versions = os_versions[options['os']]; + if (selected_os_versions == null) { + $('#row-os_version').append('
无可用版本
'); + } else { + var version_length = selected_os_versions.size; + selected_os_versions.forEach(function (version) { + $('#row-os_version').append('
' + version + '
'); + }); + $("#row-os_version div:last-child").addClass("selected"); + } + } + + $.change_options_visible = function () { + var options = $.get_options(); + if (options['install_type'] === 'direct') { + $("#header-os_version").hide(); + $("#row-os_version").hide(); + } else { + $("#header-os_version").show(); + $("#row-os_version").show(); + } + } + + $.update_cann_versions = function () { + // reset table. + var cann_version_select = $('#cann-version'); + cann_version_select.empty(); + $.reset_selection(cann_version_select); + $('#driver-version').text("Driver"); + $('#firmware-version').text("Firmware"); + + var options = $.get_options(); + // not using docker. + if (options['install_type'] === "direct") { + // update select list. + $.each(package_info, function (key, value) { + if (options['npu'] in value) { + cann_version_select.append(new Option("CANN: " + key, key)); + } + }); + } else { + $.each(package_info, function (key, value) { + // not all version has a docker image. + const option_tag = key.toLowerCase() + "-" + options['npu'] + "-" + options['os'] + options['os_version']; + const pkg_info = package_info[key][options['npu']]; + for (const image of docker_images) { + const image_tag = image.split(":")[1]; + if (image_tag.includes(option_tag) && pkg_info && + pkg_info.driver_version && pkg_info.firmware_version) { + cann_version_select.append(new Option("CANN: " + key, key)); + break; + } + } + }); + } + if (cann_version_select.children().length < 1) { + cann_version_select.children().first().text('无可用版本'); + } + cann_version_select.trigger('change'); + } + + $("#col-values").on("click", ".values-element", function () { + id = $(this).attr("id"); + fields = id.split("-"); + if (fields[1] == "version") + return; + + $.reset_selection($(this)); + $(this).addClass("selected"); + + // if os changed, update os version. + if (fields[0] === "os" || fields[0] === "npu") { + $.update_os_verions(); + } + + // if install type changed, update options visible. + if (fields[0] === "install_type") { + $.change_options_visible(); + } + + // update_cann_version if any option changed. + $.update_cann_versions(); + }); + + $("#col-values").on("change", "select", function () { + // select cann, driver, formware versions. + $.reset_selection($(this)); + $('#driver-version').text("Driver"); + $('#firmware-version').text("Firmware"); + + if ($(this).val() !== "na") { + $(this).addClass("selected"); + $('#driver-version').addClass("selected"); + $('#firmware-version').addClass("selected"); + + var options = $.get_options(); + var driver_version = package_info[options['cann']][options['npu']].driver_version; + var firmware_version = package_info[options['cann']][options['npu']].firmware_version; + $('#driver-version').text("Driver: " + driver_version); + $('#driver-version').data("version", driver_version); + $('#firmware-version').text("Firmware: " + firmware_version); + $('#firmware-version').data("version", firmware_version); + } + $.gen_content(); + }); + + $.gen_content = function () { + // instructions need all options selected. + if ($('#cann-version').val() !== "na") { + $('#install-instructions').show(); + } else { + $('#install-instructions').hide(); + return + } + + var options = $.get_options(); + + // install os dependency. + if (options['os'] === 'ubuntu') { + $('#install-dependencies-ubuntu').show(); + $('#install-dependencies-openeuler').hide(); + } else { + $('#install-dependencies-ubuntu').hide(); + $('#install-dependencies-openeuler').show(); + } + + var driver_url = package_info[options['cann']][options['npu']][options['arch']].driver_url; + var firmware_url = package_info[options['cann']][options['npu']].firmware_url; + var cann_url = package_info[options['cann']][options['arch']].url; + var kernel_url = package_info[options['cann']][options['npu']].kernel_url; + + var parts = driver_url.split("/"); + var driver_name = parts[parts.length - 1]; + parts = firmware_url.split("/"); + var firmware_name = parts[parts.length - 1]; + parts = cann_url.split("/"); + var cann_name = parts[parts.length - 1]; + + // download and install driver + $('#codecell6').html('wget "' + driver_url + '"\nsudo sh ' + driver_name + ' --full --install-for-all'); + + // download and install firmware + $('#codecell8').html('wget "' + firmware_url + '"\nsudo sh ' + firmware_name + ' --full'); + + if (options['install_type'] === 'direct') { + // download and install cann + $('#codecell11').html('wget "' + cann_url + '"\nsh ' + cann_name + ' --install'); + + // download and install kernel if exist. + if (kernel_url == null) { + $('#install_kernel_section').hide(); + } + else { + parts = kernel_url.split("/"); + var kernel_name = parts[parts.length - 1]; + $('#install_kernel_section').show(); + // download and install kernel + $('#codecell13').html('wget "' + kernel_url + '"\nsh ' + kernel_name + ' --install'); + } + + $('#use_docker_section').hide(); + $('#install_cann_section').show(); + } else { + const option_tag = options['cann'].toLowerCase() + "-" + options['npu'] + "-" + options['os'] + options['os_version']; + for (let i = 0; i < docker_images.length; i++) { + const image_tag = docker_images[i].split(":")[1]; + if (image_tag.includes(option_tag)) { + const dockerCommand = ` +docker run \\ + --name cann_container \\ + --device /dev/davinci1 \\ + --device /dev/davinci_manager \\ + --device /dev/devmm_svm \\ + --device /dev/hisi_hdc \\ + -v /usr/local/dcmi:/usr/local/dcmi \\ + -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \\ + -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ \\ + -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \\ + -v /etc/ascend_install.info:/etc/ascend_install.info \\ + -it ${docker_images[i]} bash + `; + + $('#codecell16').html(dockerCommand.trim()); + break; + } + } + $('#install_cann_section').hide(); + $('#use_docker_section').show(); + } + } + + $.update_os_verions(); + $.change_options_visible(); + $.update_cann_versions(); + +}); diff --git a/_static/basic.css b/_static/basic.css new file mode 100644 index 0000000..7ebbd6d --- /dev/null +++ b/_static/basic.css @@ -0,0 +1,914 @@ +/* + * Sphinx stylesheet -- basic theme. + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +div.section::after { + display: block; + content: ''; + clear: left; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin-top: 10px; +} + +ul.search li { + padding: 5px 0; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li p.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: 360px; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +a:visited { + color: #551A8B; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, figure.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, figure.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, figure.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +img.align-default, figure.align-default, .figure.align-default { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-default { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar, +aside.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; +} + +p.sidebar-title { + font-weight: bold; +} + +nav.contents, +aside.topic, +div.admonition, div.topic, blockquote { + clear: left; +} + +/* -- topics ---------------------------------------------------------------- */ + +nav.contents, +aside.topic, +div.topic { + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- content of sidebars/topics/admonitions -------------------------------- */ + +div.sidebar > :last-child, +aside.sidebar > :last-child, +nav.contents > :last-child, +aside.topic > :last-child, +div.topic > :last-child, +div.admonition > :last-child { + margin-bottom: 0; +} + +div.sidebar::after, +aside.sidebar::after, +nav.contents::after, +aside.topic::after, +div.topic::after, +div.admonition::after, +blockquote::after { + display: block; + content: ''; + clear: both; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table.align-default { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +th > :first-child, +td > :first-child { + margin-top: 0px; +} + +th > :last-child, +td > :last-child { + margin-bottom: 0px; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure, figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption, figcaption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number, +figcaption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text, +figcaption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist { + margin: 1em 0; +} + +table.hlist td { + vertical-align: top; +} + +/* -- object description styles --------------------------------------------- */ + +.sig { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; +} + +.sig-name, code.descname { + background-color: transparent; + font-weight: bold; +} + +.sig-name { + font-size: 1.1em; +} + +code.descname { + font-size: 1.2em; +} + +.sig-prename, code.descclassname { + background-color: transparent; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.sig-param.n { + font-style: italic; +} + +/* C++ specific styling */ + +.sig-inline.c-texpr, +.sig-inline.cpp-texpr { + font-family: unset; +} + +.sig.c .k, .sig.c .kt, +.sig.cpp .k, .sig.cpp .kt { + color: #0033B3; +} + +.sig.c .m, +.sig.cpp .m { + color: #1750EB; +} + +.sig.c .s, .sig.c .sc, +.sig.cpp .s, .sig.cpp .sc { + color: #067D17; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +:not(li) > ol > li:first-child > :first-child, +:not(li) > ul > li:first-child > :first-child { + margin-top: 0px; +} + +:not(li) > ol > li:last-child > :last-child, +:not(li) > ul > li:last-child > :last-child { + margin-bottom: 0px; +} + +ol.simple ol p, +ol.simple ul p, +ul.simple ol p, +ul.simple ul p { + margin-top: 0; +} + +ol.simple > li:not(:first-child) > p, +ul.simple > li:not(:first-child) > p { + margin-top: 0; +} + +ol.simple p, +ul.simple p { + margin-bottom: 0; +} + +aside.footnote > span, +div.citation > span { + float: left; +} +aside.footnote > span:last-of-type, +div.citation > span:last-of-type { + padding-right: 0.5em; +} +aside.footnote > p { + margin-left: 2em; +} +div.citation > p { + margin-left: 4em; +} +aside.footnote > p:last-of-type, +div.citation > p:last-of-type { + margin-bottom: 0em; +} +aside.footnote > p:last-of-type:after, +div.citation > p:last-of-type:after { + content: ""; + clear: both; +} + +dl.field-list { + display: grid; + grid-template-columns: fit-content(30%) auto; +} + +dl.field-list > dt { + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; +} + +dl.field-list > dd { + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; +} + +dl { + margin-bottom: 15px; +} + +dd > :first-child { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +.sig dd { + margin-top: 0px; + margin-bottom: 0px; +} + +.sig dl { + margin-top: 0px; + margin-bottom: 0px; +} + +dl > dd:last-child, +dl > dd:last-child > :last-child { + margin-bottom: 0; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +.classifier:before { + font-style: normal; + margin: 0 0.5em; + content: ":"; + display: inline-block; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +.translated { + background-color: rgba(207, 255, 207, 0.2) +} + +.untranslated { + background-color: rgba(255, 207, 207, 0.2) +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +pre, div[class*="highlight-"] { + clear: both; +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; + white-space: nowrap; +} + +div[class*="highlight-"] { + margin: 1em 0; +} + +td.linenos pre { + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + display: block; +} + +table.highlighttable tbody { + display: block; +} + +table.highlighttable tr { + display: flex; +} + +table.highlighttable td { + margin: 0; + padding: 0; +} + +table.highlighttable td.linenos { + padding-right: 0.5em; +} + +table.highlighttable td.code { + flex: 1; + overflow: hidden; +} + +.highlight .hll { + display: block; +} + +div.highlight pre, +table.highlighttable pre { + margin: 0; +} + +div.code-block-caption + div { + margin-top: 0; +} + +div.code-block-caption { + margin-top: 1em; + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +table.highlighttable td.linenos, +span.linenos, +div.highlight span.gp { /* gp: Generic.Prompt */ + user-select: none; + -webkit-user-select: text; /* Safari fallback only */ + -webkit-user-select: none; /* Chrome/Safari */ + -moz-user-select: none; /* Firefox */ + -ms-user-select: none; /* IE10+ */ +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + margin: 1em 0; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: absolute; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/_static/check-solid.svg b/_static/check-solid.svg new file mode 100644 index 0000000..92fad4b --- /dev/null +++ b/_static/check-solid.svg @@ -0,0 +1,4 @@ + + + + diff --git a/_static/clipboard.min.js b/_static/clipboard.min.js new file mode 100644 index 0000000..54b3c46 --- /dev/null +++ b/_static/clipboard.min.js @@ -0,0 +1,7 @@ +/*! + * clipboard.js v2.0.8 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */ +!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return o}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),c=n.n(e);function a(t){try{return document.execCommand(t)}catch(t){return}}var f=function(t){t=c()(t);return a("cut"),t};var l=function(t){var e,n,o,r=1 + + + + diff --git a/_static/copybutton.css b/_static/copybutton.css new file mode 100644 index 0000000..f1916ec --- /dev/null +++ b/_static/copybutton.css @@ -0,0 +1,94 @@ +/* Copy buttons */ +button.copybtn { + position: absolute; + display: flex; + top: .3em; + right: .3em; + width: 1.7em; + height: 1.7em; + opacity: 0; + transition: opacity 0.3s, border .3s, background-color .3s; + user-select: none; + padding: 0; + border: none; + outline: none; + border-radius: 0.4em; + /* The colors that GitHub uses */ + border: #1b1f2426 1px solid; + background-color: #f6f8fa; + color: #57606a; +} + +button.copybtn.success { + border-color: #22863a; + color: #22863a; +} + +button.copybtn svg { + stroke: currentColor; + width: 1.5em; + height: 1.5em; + padding: 0.1em; +} + +div.highlight { + position: relative; +} + +/* Show the copybutton */ +.highlight:hover button.copybtn, button.copybtn.success { + opacity: 1; +} + +.highlight button.copybtn:hover { + background-color: rgb(235, 235, 235); +} + +.highlight button.copybtn:active { + background-color: rgb(187, 187, 187); +} + +/** + * A minimal CSS-only tooltip copied from: + * https://codepen.io/mildrenben/pen/rVBrpK + * + * To use, write HTML like the following: + * + *

Short

+ */ + .o-tooltip--left { + position: relative; + } + + .o-tooltip--left:after { + opacity: 0; + visibility: hidden; + position: absolute; + content: attr(data-tooltip); + padding: .2em; + font-size: .8em; + left: -.2em; + background: grey; + color: white; + white-space: nowrap; + z-index: 2; + border-radius: 2px; + transform: translateX(-102%) translateY(0); + transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); +} + +.o-tooltip--left:hover:after { + display: block; + opacity: 1; + visibility: visible; + transform: translateX(-100%) translateY(0); + transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); + transition-delay: .5s; +} + +/* By default the copy button shouldn't show up when printing a page */ +@media print { + button.copybtn { + display: none; + } +} diff --git a/_static/copybutton.js b/_static/copybutton.js new file mode 100644 index 0000000..2ea7ff3 --- /dev/null +++ b/_static/copybutton.js @@ -0,0 +1,248 @@ +// Localization support +const messages = { + 'en': { + 'copy': 'Copy', + 'copy_to_clipboard': 'Copy to clipboard', + 'copy_success': 'Copied!', + 'copy_failure': 'Failed to copy', + }, + 'es' : { + 'copy': 'Copiar', + 'copy_to_clipboard': 'Copiar al portapapeles', + 'copy_success': '¡Copiado!', + 'copy_failure': 'Error al copiar', + }, + 'de' : { + 'copy': 'Kopieren', + 'copy_to_clipboard': 'In die Zwischenablage kopieren', + 'copy_success': 'Kopiert!', + 'copy_failure': 'Fehler beim Kopieren', + }, + 'fr' : { + 'copy': 'Copier', + 'copy_to_clipboard': 'Copier dans le presse-papier', + 'copy_success': 'Copié !', + 'copy_failure': 'Échec de la copie', + }, + 'ru': { + 'copy': 'Скопировать', + 'copy_to_clipboard': 'Скопировать в буфер', + 'copy_success': 'Скопировано!', + 'copy_failure': 'Не удалось скопировать', + }, + 'zh-CN': { + 'copy': '复制', + 'copy_to_clipboard': '复制到剪贴板', + 'copy_success': '复制成功!', + 'copy_failure': '复制失败', + }, + 'it' : { + 'copy': 'Copiare', + 'copy_to_clipboard': 'Copiato negli appunti', + 'copy_success': 'Copiato!', + 'copy_failure': 'Errore durante la copia', + } +} + +let locale = 'en' +if( document.documentElement.lang !== undefined + && messages[document.documentElement.lang] !== undefined ) { + locale = document.documentElement.lang +} + +let doc_url_root = DOCUMENTATION_OPTIONS.URL_ROOT; +if (doc_url_root == '#') { + doc_url_root = ''; +} + +/** + * SVG files for our copy buttons + */ +let iconCheck = ` + ${messages[locale]['copy_success']} + + +` + +// If the user specified their own SVG use that, otherwise use the default +let iconCopy = ``; +if (!iconCopy) { + iconCopy = ` + ${messages[locale]['copy_to_clipboard']} + + + +` +} + +/** + * Set up copy/paste for code blocks + */ + +const runWhenDOMLoaded = cb => { + if (document.readyState != 'loading') { + cb() + } else if (document.addEventListener) { + document.addEventListener('DOMContentLoaded', cb) + } else { + document.attachEvent('onreadystatechange', function() { + if (document.readyState == 'complete') cb() + }) + } +} + +const codeCellId = index => `codecell${index}` + +// Clears selected text since ClipboardJS will select the text when copying +const clearSelection = () => { + if (window.getSelection) { + window.getSelection().removeAllRanges() + } else if (document.selection) { + document.selection.empty() + } +} + +// Changes tooltip text for a moment, then changes it back +// We want the timeout of our `success` class to be a bit shorter than the +// tooltip and icon change, so that we can hide the icon before changing back. +var timeoutIcon = 2000; +var timeoutSuccessClass = 1500; + +const temporarilyChangeTooltip = (el, oldText, newText) => { + el.setAttribute('data-tooltip', newText) + el.classList.add('success') + // Remove success a little bit sooner than we change the tooltip + // So that we can use CSS to hide the copybutton first + setTimeout(() => el.classList.remove('success'), timeoutSuccessClass) + setTimeout(() => el.setAttribute('data-tooltip', oldText), timeoutIcon) +} + +// Changes the copy button icon for two seconds, then changes it back +const temporarilyChangeIcon = (el) => { + el.innerHTML = iconCheck; + setTimeout(() => {el.innerHTML = iconCopy}, timeoutIcon) +} + +const addCopyButtonToCodeCells = () => { + // If ClipboardJS hasn't loaded, wait a bit and try again. This + // happens because we load ClipboardJS asynchronously. + if (window.ClipboardJS === undefined) { + setTimeout(addCopyButtonToCodeCells, 250) + return + } + + // Add copybuttons to all of our code cells + const COPYBUTTON_SELECTOR = 'div.highlight pre'; + const codeCells = document.querySelectorAll(COPYBUTTON_SELECTOR) + codeCells.forEach((codeCell, index) => { + const id = codeCellId(index) + codeCell.setAttribute('id', id) + + const clipboardButton = id => + `` + codeCell.insertAdjacentHTML('afterend', clipboardButton(id)) + }) + +function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +/** + * Removes excluded text from a Node. + * + * @param {Node} target Node to filter. + * @param {string} exclude CSS selector of nodes to exclude. + * @returns {DOMString} Text from `target` with text removed. + */ +function filterText(target, exclude) { + const clone = target.cloneNode(true); // clone as to not modify the live DOM + if (exclude) { + // remove excluded nodes + clone.querySelectorAll(exclude).forEach(node => node.remove()); + } + return clone.innerText; +} + +// Callback when a copy button is clicked. Will be passed the node that was clicked +// should then grab the text and replace pieces of text that shouldn't be used in output +function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") { + var regexp; + var match; + + // Do we check for line continuation characters and "HERE-documents"? + var useLineCont = !!lineContinuationChar + var useHereDoc = !!hereDocDelim + + // create regexp to capture prompt and remaining line + if (isRegexp) { + regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)') + } else { + regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)') + } + + const outputLines = []; + var promptFound = false; + var gotLineCont = false; + var gotHereDoc = false; + const lineGotPrompt = []; + for (const line of textContent.split('\n')) { + match = line.match(regexp) + if (match || gotLineCont || gotHereDoc) { + promptFound = regexp.test(line) + lineGotPrompt.push(promptFound) + if (removePrompts && promptFound) { + outputLines.push(match[2]) + } else { + outputLines.push(line) + } + gotLineCont = line.endsWith(lineContinuationChar) & useLineCont + if (line.includes(hereDocDelim) & useHereDoc) + gotHereDoc = !gotHereDoc + } else if (!onlyCopyPromptLines) { + outputLines.push(line) + } else if (copyEmptyLines && line.trim() === '') { + outputLines.push(line) + } + } + + // If no lines with the prompt were found then just use original lines + if (lineGotPrompt.some(v => v === true)) { + textContent = outputLines.join('\n'); + } + + // Remove a trailing newline to avoid auto-running when pasting + if (textContent.endsWith("\n")) { + textContent = textContent.slice(0, -1) + } + return textContent +} + + +var copyTargetText = (trigger) => { + var target = document.querySelector(trigger.attributes['data-clipboard-target'].value); + + // get filtered text + let exclude = '.linenos'; + + let text = filterText(target, exclude); + return formatCopyText(text, '', false, true, true, true, '', '') +} + + // Initialize with a callback so we can modify the text before copy + const clipboard = new ClipboardJS('.copybtn', {text: copyTargetText}) + + // Update UI with error/success messages + clipboard.on('success', event => { + clearSelection() + temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_success']) + temporarilyChangeIcon(event.trigger) + }) + + clipboard.on('error', event => { + temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_failure']) + }) +} + +runWhenDOMLoaded(addCopyButtonToCodeCells) \ No newline at end of file diff --git a/_static/copybutton_funcs.js b/_static/copybutton_funcs.js new file mode 100644 index 0000000..dbe1aaa --- /dev/null +++ b/_static/copybutton_funcs.js @@ -0,0 +1,73 @@ +function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +/** + * Removes excluded text from a Node. + * + * @param {Node} target Node to filter. + * @param {string} exclude CSS selector of nodes to exclude. + * @returns {DOMString} Text from `target` with text removed. + */ +export function filterText(target, exclude) { + const clone = target.cloneNode(true); // clone as to not modify the live DOM + if (exclude) { + // remove excluded nodes + clone.querySelectorAll(exclude).forEach(node => node.remove()); + } + return clone.innerText; +} + +// Callback when a copy button is clicked. Will be passed the node that was clicked +// should then grab the text and replace pieces of text that shouldn't be used in output +export function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") { + var regexp; + var match; + + // Do we check for line continuation characters and "HERE-documents"? + var useLineCont = !!lineContinuationChar + var useHereDoc = !!hereDocDelim + + // create regexp to capture prompt and remaining line + if (isRegexp) { + regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)') + } else { + regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)') + } + + const outputLines = []; + var promptFound = false; + var gotLineCont = false; + var gotHereDoc = false; + const lineGotPrompt = []; + for (const line of textContent.split('\n')) { + match = line.match(regexp) + if (match || gotLineCont || gotHereDoc) { + promptFound = regexp.test(line) + lineGotPrompt.push(promptFound) + if (removePrompts && promptFound) { + outputLines.push(match[2]) + } else { + outputLines.push(line) + } + gotLineCont = line.endsWith(lineContinuationChar) & useLineCont + if (line.includes(hereDocDelim) & useHereDoc) + gotHereDoc = !gotHereDoc + } else if (!onlyCopyPromptLines) { + outputLines.push(line) + } else if (copyEmptyLines && line.trim() === '') { + outputLines.push(line) + } + } + + // If no lines with the prompt were found then just use original lines + if (lineGotPrompt.some(v => v === true)) { + textContent = outputLines.join('\n'); + } + + // Remove a trailing newline to avoid auto-running when pasting + if (textContent.endsWith("\n")) { + textContent = textContent.slice(0, -1) + } + return textContent +} diff --git a/_static/css/badge_only.css b/_static/css/badge_only.css new file mode 100644 index 0000000..88ba55b --- /dev/null +++ b/_static/css/badge_only.css @@ -0,0 +1 @@ +.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions .rst-other-versions .rtd-current-item{font-weight:700}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}#flyout-search-form{padding:6px} \ No newline at end of file diff --git a/_static/css/fonts/Roboto-Slab-Bold.woff b/_static/css/fonts/Roboto-Slab-Bold.woff new file mode 100644 index 0000000..6cb6000 Binary files /dev/null and b/_static/css/fonts/Roboto-Slab-Bold.woff differ diff --git a/_static/css/fonts/Roboto-Slab-Bold.woff2 b/_static/css/fonts/Roboto-Slab-Bold.woff2 new file mode 100644 index 0000000..7059e23 Binary files /dev/null and b/_static/css/fonts/Roboto-Slab-Bold.woff2 differ diff --git a/_static/css/fonts/Roboto-Slab-Regular.woff b/_static/css/fonts/Roboto-Slab-Regular.woff new file mode 100644 index 0000000..f815f63 Binary files /dev/null and b/_static/css/fonts/Roboto-Slab-Regular.woff differ diff --git a/_static/css/fonts/Roboto-Slab-Regular.woff2 b/_static/css/fonts/Roboto-Slab-Regular.woff2 new file mode 100644 index 0000000..f2c76e5 Binary files /dev/null and b/_static/css/fonts/Roboto-Slab-Regular.woff2 differ diff --git a/_static/css/fonts/fontawesome-webfont.eot b/_static/css/fonts/fontawesome-webfont.eot new file mode 100644 index 0000000..e9f60ca Binary files /dev/null and b/_static/css/fonts/fontawesome-webfont.eot differ diff --git a/_static/css/fonts/fontawesome-webfont.svg b/_static/css/fonts/fontawesome-webfont.svg new file mode 100644 index 0000000..855c845 --- /dev/null +++ b/_static/css/fonts/fontawesome-webfont.svg @@ -0,0 +1,2671 @@ + + + + +Created by FontForge 20120731 at Mon Oct 24 17:37:40 2016 + By ,,, +Copyright Dave Gandy 2016. All rights reserved. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_static/css/fonts/fontawesome-webfont.ttf b/_static/css/fonts/fontawesome-webfont.ttf new file mode 100644 index 0000000..35acda2 Binary files /dev/null and b/_static/css/fonts/fontawesome-webfont.ttf differ diff --git a/_static/css/fonts/fontawesome-webfont.woff b/_static/css/fonts/fontawesome-webfont.woff new file mode 100644 index 0000000..400014a Binary files /dev/null and b/_static/css/fonts/fontawesome-webfont.woff differ diff --git a/_static/css/fonts/fontawesome-webfont.woff2 b/_static/css/fonts/fontawesome-webfont.woff2 new file mode 100644 index 0000000..4d13fc6 Binary files /dev/null and b/_static/css/fonts/fontawesome-webfont.woff2 differ diff --git a/_static/css/fonts/lato-bold-italic.woff b/_static/css/fonts/lato-bold-italic.woff new file mode 100644 index 0000000..88ad05b Binary files /dev/null and b/_static/css/fonts/lato-bold-italic.woff differ diff --git a/_static/css/fonts/lato-bold-italic.woff2 b/_static/css/fonts/lato-bold-italic.woff2 new file mode 100644 index 0000000..c4e3d80 Binary files /dev/null and b/_static/css/fonts/lato-bold-italic.woff2 differ diff --git a/_static/css/fonts/lato-bold.woff b/_static/css/fonts/lato-bold.woff new file mode 100644 index 0000000..c6dff51 Binary files /dev/null and b/_static/css/fonts/lato-bold.woff differ diff --git a/_static/css/fonts/lato-bold.woff2 b/_static/css/fonts/lato-bold.woff2 new file mode 100644 index 0000000..bb19504 Binary files /dev/null and b/_static/css/fonts/lato-bold.woff2 differ diff --git a/_static/css/fonts/lato-normal-italic.woff b/_static/css/fonts/lato-normal-italic.woff new file mode 100644 index 0000000..76114bc Binary files /dev/null and b/_static/css/fonts/lato-normal-italic.woff differ diff --git a/_static/css/fonts/lato-normal-italic.woff2 b/_static/css/fonts/lato-normal-italic.woff2 new file mode 100644 index 0000000..3404f37 Binary files /dev/null and b/_static/css/fonts/lato-normal-italic.woff2 differ diff --git a/_static/css/fonts/lato-normal.woff b/_static/css/fonts/lato-normal.woff new file mode 100644 index 0000000..ae1307f Binary files /dev/null and b/_static/css/fonts/lato-normal.woff differ diff --git a/_static/css/fonts/lato-normal.woff2 b/_static/css/fonts/lato-normal.woff2 new file mode 100644 index 0000000..3bf9843 Binary files /dev/null and b/_static/css/fonts/lato-normal.woff2 differ diff --git a/_static/css/theme.css b/_static/css/theme.css new file mode 100644 index 0000000..0f14f10 --- /dev/null +++ b/_static/css/theme.css @@ -0,0 +1,4 @@ +html{box-sizing:border-box}*,:after,:before{box-sizing:inherit}article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}[hidden],audio:not([controls]){display:none}*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}body{margin:0}a:active,a:hover{outline:0}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:700}blockquote{margin:0}dfn{font-style:italic}ins{background:#ff9;text-decoration:none}ins,mark{color:#000}mark{background:#ff0;font-style:italic;font-weight:700}.rst-content code,.rst-content tt,code,kbd,pre,samp{font-family:monospace,serif;_font-family:courier new,monospace;font-size:1em}pre{white-space:pre}q{quotes:none}q:after,q:before{content:"";content:none}small{font-size:85%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sup{top:-.5em}sub{bottom:-.25em}dl,ol,ul{margin:0;padding:0;list-style:none;list-style-image:none}li{list-style:none}dd{margin:0}img{border:0;-ms-interpolation-mode:bicubic;vertical-align:middle;max-width:100%}svg:not(:root){overflow:hidden}figure,form{margin:0}label{cursor:pointer}button,input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}button,input{line-height:normal}button,input[type=button],input[type=reset],input[type=submit]{cursor:pointer;-webkit-appearance:button;*overflow:visible}button[disabled],input[disabled]{cursor:default}input[type=search]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}textarea{resize:vertical}table{border-collapse:collapse;border-spacing:0}td{vertical-align:top}.chromeframe{margin:.2em 0;background:#ccc;color:#000;padding:.2em 0}.ir{display:block;border:0;text-indent:-999em;overflow:hidden;background-color:transparent;background-repeat:no-repeat;text-align:left;direction:ltr;*line-height:0}.ir br{display:none}.hidden{display:none!important;visibility:hidden}.visuallyhidden{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.visuallyhidden.focusable:active,.visuallyhidden.focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}.invisible{visibility:hidden}.relative{position:relative}big,small{font-size:100%}@media print{body,html,section{background:none!important}*{box-shadow:none!important;text-shadow:none!important;filter:none!important;-ms-filter:none!important}a,a:visited{text-decoration:underline}.ir a:after,a[href^="#"]:after,a[href^="javascript:"]:after{content:""}blockquote,pre{page-break-inside:avoid}thead{display:table-header-group}img,tr{page-break-inside:avoid}img{max-width:100%!important}@page{margin:.5cm}.rst-content .toctree-wrapper>p.caption,h2,h3,p{orphans:3;widows:3}.rst-content .toctree-wrapper>p.caption,h2,h3{page-break-after:avoid}}.btn,.fa:before,.icon:before,.rst-content .admonition,.rst-content .admonition-title:before,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .code-block-caption .headerlink:before,.rst-content .danger,.rst-content .eqno .headerlink:before,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning,.rst-content code.download span:first-child:before,.rst-content dl dt .headerlink:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content p .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.wy-alert,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before,.wy-menu-vertical li button.toctree-expand:before,input[type=color],input[type=date],input[type=datetime-local],input[type=datetime],input[type=email],input[type=month],input[type=number],input[type=password],input[type=search],input[type=tel],input[type=text],input[type=time],input[type=url],input[type=week],select,textarea{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}/*! + * Font Awesome 4.7.0 by @davegandy - http://fontawesome.io - @fontawesome + * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) + */@font-face{font-family:FontAwesome;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713);src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix&v=4.7.0) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#fontawesomeregular) format("svg");font-weight:400;font-style:normal}.fa,.icon,.rst-content .admonition-title,.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content code.download span:first-child,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li button.toctree-expand{display:inline-block;font:normal normal normal 14px/1 FontAwesome;font-size:inherit;text-rendering:auto;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.33333em;line-height:.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.28571em;text-align:center}.fa-ul{padding-left:0;margin-left:2.14286em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.14286em;width:2.14286em;top:.14286em;text-align:center}.fa-li.fa-lg{left:-1.85714em}.fa-border{padding:.2em .25em .15em;border:.08em solid #eee;border-radius:.1em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa-pull-left.icon,.fa.fa-pull-left,.rst-content .code-block-caption .fa-pull-left.headerlink,.rst-content .eqno .fa-pull-left.headerlink,.rst-content .fa-pull-left.admonition-title,.rst-content code.download span.fa-pull-left:first-child,.rst-content dl dt .fa-pull-left.headerlink,.rst-content h1 .fa-pull-left.headerlink,.rst-content h2 .fa-pull-left.headerlink,.rst-content h3 .fa-pull-left.headerlink,.rst-content h4 .fa-pull-left.headerlink,.rst-content h5 .fa-pull-left.headerlink,.rst-content h6 .fa-pull-left.headerlink,.rst-content p .fa-pull-left.headerlink,.rst-content table>caption .fa-pull-left.headerlink,.rst-content tt.download span.fa-pull-left:first-child,.wy-menu-vertical li.current>a button.fa-pull-left.toctree-expand,.wy-menu-vertical li.on a button.fa-pull-left.toctree-expand,.wy-menu-vertical li button.fa-pull-left.toctree-expand{margin-right:.3em}.fa-pull-right.icon,.fa.fa-pull-right,.rst-content .code-block-caption .fa-pull-right.headerlink,.rst-content .eqno .fa-pull-right.headerlink,.rst-content .fa-pull-right.admonition-title,.rst-content code.download span.fa-pull-right:first-child,.rst-content dl dt .fa-pull-right.headerlink,.rst-content h1 .fa-pull-right.headerlink,.rst-content h2 .fa-pull-right.headerlink,.rst-content h3 .fa-pull-right.headerlink,.rst-content h4 .fa-pull-right.headerlink,.rst-content h5 .fa-pull-right.headerlink,.rst-content h6 .fa-pull-right.headerlink,.rst-content p .fa-pull-right.headerlink,.rst-content table>caption .fa-pull-right.headerlink,.rst-content tt.download span.fa-pull-right:first-child,.wy-menu-vertical li.current>a button.fa-pull-right.toctree-expand,.wy-menu-vertical li.on a button.fa-pull-right.toctree-expand,.wy-menu-vertical li button.fa-pull-right.toctree-expand{margin-left:.3em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left,.pull-left.icon,.rst-content .code-block-caption .pull-left.headerlink,.rst-content .eqno .pull-left.headerlink,.rst-content .pull-left.admonition-title,.rst-content code.download span.pull-left:first-child,.rst-content dl dt .pull-left.headerlink,.rst-content h1 .pull-left.headerlink,.rst-content h2 .pull-left.headerlink,.rst-content h3 .pull-left.headerlink,.rst-content h4 .pull-left.headerlink,.rst-content h5 .pull-left.headerlink,.rst-content h6 .pull-left.headerlink,.rst-content p .pull-left.headerlink,.rst-content table>caption .pull-left.headerlink,.rst-content tt.download span.pull-left:first-child,.wy-menu-vertical li.current>a button.pull-left.toctree-expand,.wy-menu-vertical li.on a button.pull-left.toctree-expand,.wy-menu-vertical li button.pull-left.toctree-expand{margin-right:.3em}.fa.pull-right,.pull-right.icon,.rst-content .code-block-caption .pull-right.headerlink,.rst-content .eqno .pull-right.headerlink,.rst-content .pull-right.admonition-title,.rst-content code.download span.pull-right:first-child,.rst-content dl dt .pull-right.headerlink,.rst-content h1 .pull-right.headerlink,.rst-content h2 .pull-right.headerlink,.rst-content h3 .pull-right.headerlink,.rst-content h4 .pull-right.headerlink,.rst-content h5 .pull-right.headerlink,.rst-content h6 .pull-right.headerlink,.rst-content p .pull-right.headerlink,.rst-content table>caption .pull-right.headerlink,.rst-content tt.download span.pull-right:first-child,.wy-menu-vertical li.current>a button.pull-right.toctree-expand,.wy-menu-vertical li.on a button.pull-right.toctree-expand,.wy-menu-vertical li button.pull-right.toctree-expand{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s linear infinite;animation:fa-spin 2s linear infinite}.fa-pulse{-webkit-animation:fa-spin 1s steps(8) infinite;animation:fa-spin 1s steps(8) infinite}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";-webkit-transform:rotate(90deg);-ms-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";-webkit-transform:rotate(180deg);-ms-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";-webkit-transform:rotate(270deg);-ms-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";-webkit-transform:scaleX(-1);-ms-transform:scaleX(-1);transform:scaleX(-1)}.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)";-webkit-transform:scaleY(-1);-ms-transform:scaleY(-1);transform:scaleY(-1)}:root .fa-flip-horizontal,:root .fa-flip-vertical,:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270{filter:none}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:""}.fa-music:before{content:""}.fa-search:before,.icon-search:before{content:""}.fa-envelope-o:before{content:""}.fa-heart:before{content:""}.fa-star:before{content:""}.fa-star-o:before{content:""}.fa-user:before{content:""}.fa-film:before{content:""}.fa-th-large:before{content:""}.fa-th:before{content:""}.fa-th-list:before{content:""}.fa-check:before{content:""}.fa-close:before,.fa-remove:before,.fa-times:before{content:""}.fa-search-plus:before{content:""}.fa-search-minus:before{content:""}.fa-power-off:before{content:""}.fa-signal:before{content:""}.fa-cog:before,.fa-gear:before{content:""}.fa-trash-o:before{content:""}.fa-home:before,.icon-home:before{content:""}.fa-file-o:before{content:""}.fa-clock-o:before{content:""}.fa-road:before{content:""}.fa-download:before,.rst-content code.download span:first-child:before,.rst-content tt.download span:first-child:before{content:""}.fa-arrow-circle-o-down:before{content:""}.fa-arrow-circle-o-up:before{content:""}.fa-inbox:before{content:""}.fa-play-circle-o:before{content:""}.fa-repeat:before,.fa-rotate-right:before{content:""}.fa-refresh:before{content:""}.fa-list-alt:before{content:""}.fa-lock:before{content:""}.fa-flag:before{content:""}.fa-headphones:before{content:""}.fa-volume-off:before{content:""}.fa-volume-down:before{content:""}.fa-volume-up:before{content:""}.fa-qrcode:before{content:""}.fa-barcode:before{content:""}.fa-tag:before{content:""}.fa-tags:before{content:""}.fa-book:before,.icon-book:before{content:""}.fa-bookmark:before{content:""}.fa-print:before{content:""}.fa-camera:before{content:""}.fa-font:before{content:""}.fa-bold:before{content:""}.fa-italic:before{content:""}.fa-text-height:before{content:""}.fa-text-width:before{content:""}.fa-align-left:before{content:""}.fa-align-center:before{content:""}.fa-align-right:before{content:""}.fa-align-justify:before{content:""}.fa-list:before{content:""}.fa-dedent:before,.fa-outdent:before{content:""}.fa-indent:before{content:""}.fa-video-camera:before{content:""}.fa-image:before,.fa-photo:before,.fa-picture-o:before{content:""}.fa-pencil:before{content:""}.fa-map-marker:before{content:""}.fa-adjust:before{content:""}.fa-tint:before{content:""}.fa-edit:before,.fa-pencil-square-o:before{content:""}.fa-share-square-o:before{content:""}.fa-check-square-o:before{content:""}.fa-arrows:before{content:""}.fa-step-backward:before{content:""}.fa-fast-backward:before{content:""}.fa-backward:before{content:""}.fa-play:before{content:""}.fa-pause:before{content:""}.fa-stop:before{content:""}.fa-forward:before{content:""}.fa-fast-forward:before{content:""}.fa-step-forward:before{content:""}.fa-eject:before{content:""}.fa-chevron-left:before{content:""}.fa-chevron-right:before{content:""}.fa-plus-circle:before{content:""}.fa-minus-circle:before{content:""}.fa-times-circle:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before{content:""}.fa-check-circle:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before{content:""}.fa-question-circle:before{content:""}.fa-info-circle:before{content:""}.fa-crosshairs:before{content:""}.fa-times-circle-o:before{content:""}.fa-check-circle-o:before{content:""}.fa-ban:before{content:""}.fa-arrow-left:before{content:""}.fa-arrow-right:before{content:""}.fa-arrow-up:before{content:""}.fa-arrow-down:before{content:""}.fa-mail-forward:before,.fa-share:before{content:""}.fa-expand:before{content:""}.fa-compress:before{content:""}.fa-plus:before{content:""}.fa-minus:before{content:""}.fa-asterisk:before{content:""}.fa-exclamation-circle:before,.rst-content .admonition-title:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before{content:""}.fa-gift:before{content:""}.fa-leaf:before{content:""}.fa-fire:before,.icon-fire:before{content:""}.fa-eye:before{content:""}.fa-eye-slash:before{content:""}.fa-exclamation-triangle:before,.fa-warning:before{content:""}.fa-plane:before{content:""}.fa-calendar:before{content:""}.fa-random:before{content:""}.fa-comment:before{content:""}.fa-magnet:before{content:""}.fa-chevron-up:before{content:""}.fa-chevron-down:before{content:""}.fa-retweet:before{content:""}.fa-shopping-cart:before{content:""}.fa-folder:before{content:""}.fa-folder-open:before{content:""}.fa-arrows-v:before{content:""}.fa-arrows-h:before{content:""}.fa-bar-chart-o:before,.fa-bar-chart:before{content:""}.fa-twitter-square:before{content:""}.fa-facebook-square:before{content:""}.fa-camera-retro:before{content:""}.fa-key:before{content:""}.fa-cogs:before,.fa-gears:before{content:""}.fa-comments:before{content:""}.fa-thumbs-o-up:before{content:""}.fa-thumbs-o-down:before{content:""}.fa-star-half:before{content:""}.fa-heart-o:before{content:""}.fa-sign-out:before{content:""}.fa-linkedin-square:before{content:""}.fa-thumb-tack:before{content:""}.fa-external-link:before{content:""}.fa-sign-in:before{content:""}.fa-trophy:before{content:""}.fa-github-square:before{content:""}.fa-upload:before{content:""}.fa-lemon-o:before{content:""}.fa-phone:before{content:""}.fa-square-o:before{content:""}.fa-bookmark-o:before{content:""}.fa-phone-square:before{content:""}.fa-twitter:before{content:""}.fa-facebook-f:before,.fa-facebook:before{content:""}.fa-github:before,.icon-github:before{content:""}.fa-unlock:before{content:""}.fa-credit-card:before{content:""}.fa-feed:before,.fa-rss:before{content:""}.fa-hdd-o:before{content:""}.fa-bullhorn:before{content:""}.fa-bell:before{content:""}.fa-certificate:before{content:""}.fa-hand-o-right:before{content:""}.fa-hand-o-left:before{content:""}.fa-hand-o-up:before{content:""}.fa-hand-o-down:before{content:""}.fa-arrow-circle-left:before,.icon-circle-arrow-left:before{content:""}.fa-arrow-circle-right:before,.icon-circle-arrow-right:before{content:""}.fa-arrow-circle-up:before{content:""}.fa-arrow-circle-down:before{content:""}.fa-globe:before{content:""}.fa-wrench:before{content:""}.fa-tasks:before{content:""}.fa-filter:before{content:""}.fa-briefcase:before{content:""}.fa-arrows-alt:before{content:""}.fa-group:before,.fa-users:before{content:""}.fa-chain:before,.fa-link:before,.icon-link:before{content:""}.fa-cloud:before{content:""}.fa-flask:before{content:""}.fa-cut:before,.fa-scissors:before{content:""}.fa-copy:before,.fa-files-o:before{content:""}.fa-paperclip:before{content:""}.fa-floppy-o:before,.fa-save:before{content:""}.fa-square:before{content:""}.fa-bars:before,.fa-navicon:before,.fa-reorder:before{content:""}.fa-list-ul:before{content:""}.fa-list-ol:before{content:""}.fa-strikethrough:before{content:""}.fa-underline:before{content:""}.fa-table:before{content:""}.fa-magic:before{content:""}.fa-truck:before{content:""}.fa-pinterest:before{content:""}.fa-pinterest-square:before{content:""}.fa-google-plus-square:before{content:""}.fa-google-plus:before{content:""}.fa-money:before{content:""}.fa-caret-down:before,.icon-caret-down:before,.wy-dropdown .caret:before{content:""}.fa-caret-up:before{content:""}.fa-caret-left:before{content:""}.fa-caret-right:before{content:""}.fa-columns:before{content:""}.fa-sort:before,.fa-unsorted:before{content:""}.fa-sort-desc:before,.fa-sort-down:before{content:""}.fa-sort-asc:before,.fa-sort-up:before{content:""}.fa-envelope:before{content:""}.fa-linkedin:before{content:""}.fa-rotate-left:before,.fa-undo:before{content:""}.fa-gavel:before,.fa-legal:before{content:""}.fa-dashboard:before,.fa-tachometer:before{content:""}.fa-comment-o:before{content:""}.fa-comments-o:before{content:""}.fa-bolt:before,.fa-flash:before{content:""}.fa-sitemap:before{content:""}.fa-umbrella:before{content:""}.fa-clipboard:before,.fa-paste:before{content:""}.fa-lightbulb-o:before{content:""}.fa-exchange:before{content:""}.fa-cloud-download:before{content:""}.fa-cloud-upload:before{content:""}.fa-user-md:before{content:""}.fa-stethoscope:before{content:""}.fa-suitcase:before{content:""}.fa-bell-o:before{content:""}.fa-coffee:before{content:""}.fa-cutlery:before{content:""}.fa-file-text-o:before{content:""}.fa-building-o:before{content:""}.fa-hospital-o:before{content:""}.fa-ambulance:before{content:""}.fa-medkit:before{content:""}.fa-fighter-jet:before{content:""}.fa-beer:before{content:""}.fa-h-square:before{content:""}.fa-plus-square:before{content:""}.fa-angle-double-left:before{content:""}.fa-angle-double-right:before{content:""}.fa-angle-double-up:before{content:""}.fa-angle-double-down:before{content:""}.fa-angle-left:before{content:""}.fa-angle-right:before{content:""}.fa-angle-up:before{content:""}.fa-angle-down:before{content:""}.fa-desktop:before{content:""}.fa-laptop:before{content:""}.fa-tablet:before{content:""}.fa-mobile-phone:before,.fa-mobile:before{content:""}.fa-circle-o:before{content:""}.fa-quote-left:before{content:""}.fa-quote-right:before{content:""}.fa-spinner:before{content:""}.fa-circle:before{content:""}.fa-mail-reply:before,.fa-reply:before{content:""}.fa-github-alt:before{content:""}.fa-folder-o:before{content:""}.fa-folder-open-o:before{content:""}.fa-smile-o:before{content:""}.fa-frown-o:before{content:""}.fa-meh-o:before{content:""}.fa-gamepad:before{content:""}.fa-keyboard-o:before{content:""}.fa-flag-o:before{content:""}.fa-flag-checkered:before{content:""}.fa-terminal:before{content:""}.fa-code:before{content:""}.fa-mail-reply-all:before,.fa-reply-all:before{content:""}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:""}.fa-location-arrow:before{content:""}.fa-crop:before{content:""}.fa-code-fork:before{content:""}.fa-chain-broken:before,.fa-unlink:before{content:""}.fa-question:before{content:""}.fa-info:before{content:""}.fa-exclamation:before{content:""}.fa-superscript:before{content:""}.fa-subscript:before{content:""}.fa-eraser:before{content:""}.fa-puzzle-piece:before{content:""}.fa-microphone:before{content:""}.fa-microphone-slash:before{content:""}.fa-shield:before{content:""}.fa-calendar-o:before{content:""}.fa-fire-extinguisher:before{content:""}.fa-rocket:before{content:""}.fa-maxcdn:before{content:""}.fa-chevron-circle-left:before{content:""}.fa-chevron-circle-right:before{content:""}.fa-chevron-circle-up:before{content:""}.fa-chevron-circle-down:before{content:""}.fa-html5:before{content:""}.fa-css3:before{content:""}.fa-anchor:before{content:""}.fa-unlock-alt:before{content:""}.fa-bullseye:before{content:""}.fa-ellipsis-h:before{content:""}.fa-ellipsis-v:before{content:""}.fa-rss-square:before{content:""}.fa-play-circle:before{content:""}.fa-ticket:before{content:""}.fa-minus-square:before{content:""}.fa-minus-square-o:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before{content:""}.fa-level-up:before{content:""}.fa-level-down:before{content:""}.fa-check-square:before{content:""}.fa-pencil-square:before{content:""}.fa-external-link-square:before{content:""}.fa-share-square:before{content:""}.fa-compass:before{content:""}.fa-caret-square-o-down:before,.fa-toggle-down:before{content:""}.fa-caret-square-o-up:before,.fa-toggle-up:before{content:""}.fa-caret-square-o-right:before,.fa-toggle-right:before{content:""}.fa-eur:before,.fa-euro:before{content:""}.fa-gbp:before{content:""}.fa-dollar:before,.fa-usd:before{content:""}.fa-inr:before,.fa-rupee:before{content:""}.fa-cny:before,.fa-jpy:before,.fa-rmb:before,.fa-yen:before{content:""}.fa-rouble:before,.fa-rub:before,.fa-ruble:before{content:""}.fa-krw:before,.fa-won:before{content:""}.fa-bitcoin:before,.fa-btc:before{content:""}.fa-file:before{content:""}.fa-file-text:before{content:""}.fa-sort-alpha-asc:before{content:""}.fa-sort-alpha-desc:before{content:""}.fa-sort-amount-asc:before{content:""}.fa-sort-amount-desc:before{content:""}.fa-sort-numeric-asc:before{content:""}.fa-sort-numeric-desc:before{content:""}.fa-thumbs-up:before{content:""}.fa-thumbs-down:before{content:""}.fa-youtube-square:before{content:""}.fa-youtube:before{content:""}.fa-xing:before{content:""}.fa-xing-square:before{content:""}.fa-youtube-play:before{content:""}.fa-dropbox:before{content:""}.fa-stack-overflow:before{content:""}.fa-instagram:before{content:""}.fa-flickr:before{content:""}.fa-adn:before{content:""}.fa-bitbucket:before,.icon-bitbucket:before{content:""}.fa-bitbucket-square:before{content:""}.fa-tumblr:before{content:""}.fa-tumblr-square:before{content:""}.fa-long-arrow-down:before{content:""}.fa-long-arrow-up:before{content:""}.fa-long-arrow-left:before{content:""}.fa-long-arrow-right:before{content:""}.fa-apple:before{content:""}.fa-windows:before{content:""}.fa-android:before{content:""}.fa-linux:before{content:""}.fa-dribbble:before{content:""}.fa-skype:before{content:""}.fa-foursquare:before{content:""}.fa-trello:before{content:""}.fa-female:before{content:""}.fa-male:before{content:""}.fa-gittip:before,.fa-gratipay:before{content:""}.fa-sun-o:before{content:""}.fa-moon-o:before{content:""}.fa-archive:before{content:""}.fa-bug:before{content:""}.fa-vk:before{content:""}.fa-weibo:before{content:""}.fa-renren:before{content:""}.fa-pagelines:before{content:""}.fa-stack-exchange:before{content:""}.fa-arrow-circle-o-right:before{content:""}.fa-arrow-circle-o-left:before{content:""}.fa-caret-square-o-left:before,.fa-toggle-left:before{content:""}.fa-dot-circle-o:before{content:""}.fa-wheelchair:before{content:""}.fa-vimeo-square:before{content:""}.fa-try:before,.fa-turkish-lira:before{content:""}.fa-plus-square-o:before,.wy-menu-vertical li button.toctree-expand:before{content:""}.fa-space-shuttle:before{content:""}.fa-slack:before{content:""}.fa-envelope-square:before{content:""}.fa-wordpress:before{content:""}.fa-openid:before{content:""}.fa-bank:before,.fa-institution:before,.fa-university:before{content:""}.fa-graduation-cap:before,.fa-mortar-board:before{content:""}.fa-yahoo:before{content:""}.fa-google:before{content:""}.fa-reddit:before{content:""}.fa-reddit-square:before{content:""}.fa-stumbleupon-circle:before{content:""}.fa-stumbleupon:before{content:""}.fa-delicious:before{content:""}.fa-digg:before{content:""}.fa-pied-piper-pp:before{content:""}.fa-pied-piper-alt:before{content:""}.fa-drupal:before{content:""}.fa-joomla:before{content:""}.fa-language:before{content:""}.fa-fax:before{content:""}.fa-building:before{content:""}.fa-child:before{content:""}.fa-paw:before{content:""}.fa-spoon:before{content:""}.fa-cube:before{content:""}.fa-cubes:before{content:""}.fa-behance:before{content:""}.fa-behance-square:before{content:""}.fa-steam:before{content:""}.fa-steam-square:before{content:""}.fa-recycle:before{content:""}.fa-automobile:before,.fa-car:before{content:""}.fa-cab:before,.fa-taxi:before{content:""}.fa-tree:before{content:""}.fa-spotify:before{content:""}.fa-deviantart:before{content:""}.fa-soundcloud:before{content:""}.fa-database:before{content:""}.fa-file-pdf-o:before{content:""}.fa-file-word-o:before{content:""}.fa-file-excel-o:before{content:""}.fa-file-powerpoint-o:before{content:""}.fa-file-image-o:before,.fa-file-photo-o:before,.fa-file-picture-o:before{content:""}.fa-file-archive-o:before,.fa-file-zip-o:before{content:""}.fa-file-audio-o:before,.fa-file-sound-o:before{content:""}.fa-file-movie-o:before,.fa-file-video-o:before{content:""}.fa-file-code-o:before{content:""}.fa-vine:before{content:""}.fa-codepen:before{content:""}.fa-jsfiddle:before{content:""}.fa-life-bouy:before,.fa-life-buoy:before,.fa-life-ring:before,.fa-life-saver:before,.fa-support:before{content:""}.fa-circle-o-notch:before{content:""}.fa-ra:before,.fa-rebel:before,.fa-resistance:before{content:""}.fa-empire:before,.fa-ge:before{content:""}.fa-git-square:before{content:""}.fa-git:before{content:""}.fa-hacker-news:before,.fa-y-combinator-square:before,.fa-yc-square:before{content:""}.fa-tencent-weibo:before{content:""}.fa-qq:before{content:""}.fa-wechat:before,.fa-weixin:before{content:""}.fa-paper-plane:before,.fa-send:before{content:""}.fa-paper-plane-o:before,.fa-send-o:before{content:""}.fa-history:before{content:""}.fa-circle-thin:before{content:""}.fa-header:before{content:""}.fa-paragraph:before{content:""}.fa-sliders:before{content:""}.fa-share-alt:before{content:""}.fa-share-alt-square:before{content:""}.fa-bomb:before{content:""}.fa-futbol-o:before,.fa-soccer-ball-o:before{content:""}.fa-tty:before{content:""}.fa-binoculars:before{content:""}.fa-plug:before{content:""}.fa-slideshare:before{content:""}.fa-twitch:before{content:""}.fa-yelp:before{content:""}.fa-newspaper-o:before{content:""}.fa-wifi:before{content:""}.fa-calculator:before{content:""}.fa-paypal:before{content:""}.fa-google-wallet:before{content:""}.fa-cc-visa:before{content:""}.fa-cc-mastercard:before{content:""}.fa-cc-discover:before{content:""}.fa-cc-amex:before{content:""}.fa-cc-paypal:before{content:""}.fa-cc-stripe:before{content:""}.fa-bell-slash:before{content:""}.fa-bell-slash-o:before{content:""}.fa-trash:before{content:""}.fa-copyright:before{content:""}.fa-at:before{content:""}.fa-eyedropper:before{content:""}.fa-paint-brush:before{content:""}.fa-birthday-cake:before{content:""}.fa-area-chart:before{content:""}.fa-pie-chart:before{content:""}.fa-line-chart:before{content:""}.fa-lastfm:before{content:""}.fa-lastfm-square:before{content:""}.fa-toggle-off:before{content:""}.fa-toggle-on:before{content:""}.fa-bicycle:before{content:""}.fa-bus:before{content:""}.fa-ioxhost:before{content:""}.fa-angellist:before{content:""}.fa-cc:before{content:""}.fa-ils:before,.fa-shekel:before,.fa-sheqel:before{content:""}.fa-meanpath:before{content:""}.fa-buysellads:before{content:""}.fa-connectdevelop:before{content:""}.fa-dashcube:before{content:""}.fa-forumbee:before{content:""}.fa-leanpub:before{content:""}.fa-sellsy:before{content:""}.fa-shirtsinbulk:before{content:""}.fa-simplybuilt:before{content:""}.fa-skyatlas:before{content:""}.fa-cart-plus:before{content:""}.fa-cart-arrow-down:before{content:""}.fa-diamond:before{content:""}.fa-ship:before{content:""}.fa-user-secret:before{content:""}.fa-motorcycle:before{content:""}.fa-street-view:before{content:""}.fa-heartbeat:before{content:""}.fa-venus:before{content:""}.fa-mars:before{content:""}.fa-mercury:before{content:""}.fa-intersex:before,.fa-transgender:before{content:""}.fa-transgender-alt:before{content:""}.fa-venus-double:before{content:""}.fa-mars-double:before{content:""}.fa-venus-mars:before{content:""}.fa-mars-stroke:before{content:""}.fa-mars-stroke-v:before{content:""}.fa-mars-stroke-h:before{content:""}.fa-neuter:before{content:""}.fa-genderless:before{content:""}.fa-facebook-official:before{content:""}.fa-pinterest-p:before{content:""}.fa-whatsapp:before{content:""}.fa-server:before{content:""}.fa-user-plus:before{content:""}.fa-user-times:before{content:""}.fa-bed:before,.fa-hotel:before{content:""}.fa-viacoin:before{content:""}.fa-train:before{content:""}.fa-subway:before{content:""}.fa-medium:before{content:""}.fa-y-combinator:before,.fa-yc:before{content:""}.fa-optin-monster:before{content:""}.fa-opencart:before{content:""}.fa-expeditedssl:before{content:""}.fa-battery-4:before,.fa-battery-full:before,.fa-battery:before{content:""}.fa-battery-3:before,.fa-battery-three-quarters:before{content:""}.fa-battery-2:before,.fa-battery-half:before{content:""}.fa-battery-1:before,.fa-battery-quarter:before{content:""}.fa-battery-0:before,.fa-battery-empty:before{content:""}.fa-mouse-pointer:before{content:""}.fa-i-cursor:before{content:""}.fa-object-group:before{content:""}.fa-object-ungroup:before{content:""}.fa-sticky-note:before{content:""}.fa-sticky-note-o:before{content:""}.fa-cc-jcb:before{content:""}.fa-cc-diners-club:before{content:""}.fa-clone:before{content:""}.fa-balance-scale:before{content:""}.fa-hourglass-o:before{content:""}.fa-hourglass-1:before,.fa-hourglass-start:before{content:""}.fa-hourglass-2:before,.fa-hourglass-half:before{content:""}.fa-hourglass-3:before,.fa-hourglass-end:before{content:""}.fa-hourglass:before{content:""}.fa-hand-grab-o:before,.fa-hand-rock-o:before{content:""}.fa-hand-paper-o:before,.fa-hand-stop-o:before{content:""}.fa-hand-scissors-o:before{content:""}.fa-hand-lizard-o:before{content:""}.fa-hand-spock-o:before{content:""}.fa-hand-pointer-o:before{content:""}.fa-hand-peace-o:before{content:""}.fa-trademark:before{content:""}.fa-registered:before{content:""}.fa-creative-commons:before{content:""}.fa-gg:before{content:""}.fa-gg-circle:before{content:""}.fa-tripadvisor:before{content:""}.fa-odnoklassniki:before{content:""}.fa-odnoklassniki-square:before{content:""}.fa-get-pocket:before{content:""}.fa-wikipedia-w:before{content:""}.fa-safari:before{content:""}.fa-chrome:before{content:""}.fa-firefox:before{content:""}.fa-opera:before{content:""}.fa-internet-explorer:before{content:""}.fa-television:before,.fa-tv:before{content:""}.fa-contao:before{content:""}.fa-500px:before{content:""}.fa-amazon:before{content:""}.fa-calendar-plus-o:before{content:""}.fa-calendar-minus-o:before{content:""}.fa-calendar-times-o:before{content:""}.fa-calendar-check-o:before{content:""}.fa-industry:before{content:""}.fa-map-pin:before{content:""}.fa-map-signs:before{content:""}.fa-map-o:before{content:""}.fa-map:before{content:""}.fa-commenting:before{content:""}.fa-commenting-o:before{content:""}.fa-houzz:before{content:""}.fa-vimeo:before{content:""}.fa-black-tie:before{content:""}.fa-fonticons:before{content:""}.fa-reddit-alien:before{content:""}.fa-edge:before{content:""}.fa-credit-card-alt:before{content:""}.fa-codiepie:before{content:""}.fa-modx:before{content:""}.fa-fort-awesome:before{content:""}.fa-usb:before{content:""}.fa-product-hunt:before{content:""}.fa-mixcloud:before{content:""}.fa-scribd:before{content:""}.fa-pause-circle:before{content:""}.fa-pause-circle-o:before{content:""}.fa-stop-circle:before{content:""}.fa-stop-circle-o:before{content:""}.fa-shopping-bag:before{content:""}.fa-shopping-basket:before{content:""}.fa-hashtag:before{content:""}.fa-bluetooth:before{content:""}.fa-bluetooth-b:before{content:""}.fa-percent:before{content:""}.fa-gitlab:before,.icon-gitlab:before{content:""}.fa-wpbeginner:before{content:""}.fa-wpforms:before{content:""}.fa-envira:before{content:""}.fa-universal-access:before{content:""}.fa-wheelchair-alt:before{content:""}.fa-question-circle-o:before{content:""}.fa-blind:before{content:""}.fa-audio-description:before{content:""}.fa-volume-control-phone:before{content:""}.fa-braille:before{content:""}.fa-assistive-listening-systems:before{content:""}.fa-american-sign-language-interpreting:before,.fa-asl-interpreting:before{content:""}.fa-deaf:before,.fa-deafness:before,.fa-hard-of-hearing:before{content:""}.fa-glide:before{content:""}.fa-glide-g:before{content:""}.fa-sign-language:before,.fa-signing:before{content:""}.fa-low-vision:before{content:""}.fa-viadeo:before{content:""}.fa-viadeo-square:before{content:""}.fa-snapchat:before{content:""}.fa-snapchat-ghost:before{content:""}.fa-snapchat-square:before{content:""}.fa-pied-piper:before{content:""}.fa-first-order:before{content:""}.fa-yoast:before{content:""}.fa-themeisle:before{content:""}.fa-google-plus-circle:before,.fa-google-plus-official:before{content:""}.fa-fa:before,.fa-font-awesome:before{content:""}.fa-handshake-o:before{content:""}.fa-envelope-open:before{content:""}.fa-envelope-open-o:before{content:""}.fa-linode:before{content:""}.fa-address-book:before{content:""}.fa-address-book-o:before{content:""}.fa-address-card:before,.fa-vcard:before{content:""}.fa-address-card-o:before,.fa-vcard-o:before{content:""}.fa-user-circle:before{content:""}.fa-user-circle-o:before{content:""}.fa-user-o:before{content:""}.fa-id-badge:before{content:""}.fa-drivers-license:before,.fa-id-card:before{content:""}.fa-drivers-license-o:before,.fa-id-card-o:before{content:""}.fa-quora:before{content:""}.fa-free-code-camp:before{content:""}.fa-telegram:before{content:""}.fa-thermometer-4:before,.fa-thermometer-full:before,.fa-thermometer:before{content:""}.fa-thermometer-3:before,.fa-thermometer-three-quarters:before{content:""}.fa-thermometer-2:before,.fa-thermometer-half:before{content:""}.fa-thermometer-1:before,.fa-thermometer-quarter:before{content:""}.fa-thermometer-0:before,.fa-thermometer-empty:before{content:""}.fa-shower:before{content:""}.fa-bath:before,.fa-bathtub:before,.fa-s15:before{content:""}.fa-podcast:before{content:""}.fa-window-maximize:before{content:""}.fa-window-minimize:before{content:""}.fa-window-restore:before{content:""}.fa-times-rectangle:before,.fa-window-close:before{content:""}.fa-times-rectangle-o:before,.fa-window-close-o:before{content:""}.fa-bandcamp:before{content:""}.fa-grav:before{content:""}.fa-etsy:before{content:""}.fa-imdb:before{content:""}.fa-ravelry:before{content:""}.fa-eercast:before{content:""}.fa-microchip:before{content:""}.fa-snowflake-o:before{content:""}.fa-superpowers:before{content:""}.fa-wpexplorer:before{content:""}.fa-meetup:before{content:""}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0,0,0,0);border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;margin:0;overflow:visible;clip:auto}.fa,.icon,.rst-content .admonition-title,.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content code.download span:first-child,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.wy-dropdown .caret,.wy-inline-validate.wy-inline-validate-danger .wy-input-context,.wy-inline-validate.wy-inline-validate-info .wy-input-context,.wy-inline-validate.wy-inline-validate-success .wy-input-context,.wy-inline-validate.wy-inline-validate-warning .wy-input-context,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li button.toctree-expand{font-family:inherit}.fa:before,.icon:before,.rst-content .admonition-title:before,.rst-content .code-block-caption .headerlink:before,.rst-content .eqno .headerlink:before,.rst-content code.download span:first-child:before,.rst-content dl dt .headerlink:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content p .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before,.wy-menu-vertical li button.toctree-expand:before{font-family:FontAwesome;display:inline-block;font-style:normal;font-weight:400;line-height:1;text-decoration:inherit}.rst-content .code-block-caption a .headerlink,.rst-content .eqno a .headerlink,.rst-content a .admonition-title,.rst-content code.download a span:first-child,.rst-content dl dt a .headerlink,.rst-content h1 a .headerlink,.rst-content h2 a .headerlink,.rst-content h3 a .headerlink,.rst-content h4 a .headerlink,.rst-content h5 a .headerlink,.rst-content h6 a .headerlink,.rst-content p.caption a .headerlink,.rst-content p a .headerlink,.rst-content table>caption a .headerlink,.rst-content tt.download a span:first-child,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li a button.toctree-expand,a .fa,a .icon,a .rst-content .admonition-title,a .rst-content .code-block-caption .headerlink,a .rst-content .eqno .headerlink,a .rst-content code.download span:first-child,a .rst-content dl dt .headerlink,a .rst-content h1 .headerlink,a .rst-content h2 .headerlink,a .rst-content h3 .headerlink,a .rst-content h4 .headerlink,a .rst-content h5 .headerlink,a .rst-content h6 .headerlink,a .rst-content p.caption .headerlink,a .rst-content p .headerlink,a .rst-content table>caption .headerlink,a .rst-content tt.download span:first-child,a .wy-menu-vertical li button.toctree-expand{display:inline-block;text-decoration:inherit}.btn .fa,.btn .icon,.btn .rst-content .admonition-title,.btn .rst-content .code-block-caption .headerlink,.btn .rst-content .eqno .headerlink,.btn .rst-content code.download span:first-child,.btn .rst-content dl dt .headerlink,.btn .rst-content h1 .headerlink,.btn .rst-content h2 .headerlink,.btn .rst-content h3 .headerlink,.btn .rst-content h4 .headerlink,.btn .rst-content h5 .headerlink,.btn .rst-content h6 .headerlink,.btn .rst-content p .headerlink,.btn .rst-content table>caption .headerlink,.btn .rst-content tt.download span:first-child,.btn .wy-menu-vertical li.current>a button.toctree-expand,.btn .wy-menu-vertical li.on a button.toctree-expand,.btn .wy-menu-vertical li button.toctree-expand,.nav .fa,.nav .icon,.nav .rst-content .admonition-title,.nav .rst-content .code-block-caption .headerlink,.nav .rst-content .eqno .headerlink,.nav .rst-content code.download span:first-child,.nav .rst-content dl dt .headerlink,.nav .rst-content h1 .headerlink,.nav .rst-content h2 .headerlink,.nav .rst-content h3 .headerlink,.nav .rst-content h4 .headerlink,.nav .rst-content h5 .headerlink,.nav .rst-content h6 .headerlink,.nav .rst-content p .headerlink,.nav .rst-content table>caption .headerlink,.nav .rst-content tt.download span:first-child,.nav .wy-menu-vertical li.current>a button.toctree-expand,.nav .wy-menu-vertical li.on a button.toctree-expand,.nav .wy-menu-vertical li button.toctree-expand,.rst-content .btn .admonition-title,.rst-content .code-block-caption .btn .headerlink,.rst-content .code-block-caption .nav .headerlink,.rst-content .eqno .btn .headerlink,.rst-content .eqno .nav .headerlink,.rst-content .nav .admonition-title,.rst-content code.download .btn span:first-child,.rst-content code.download .nav span:first-child,.rst-content dl dt .btn .headerlink,.rst-content dl dt .nav .headerlink,.rst-content h1 .btn .headerlink,.rst-content h1 .nav .headerlink,.rst-content h2 .btn .headerlink,.rst-content h2 .nav .headerlink,.rst-content h3 .btn .headerlink,.rst-content h3 .nav .headerlink,.rst-content h4 .btn .headerlink,.rst-content h4 .nav .headerlink,.rst-content h5 .btn .headerlink,.rst-content h5 .nav .headerlink,.rst-content h6 .btn .headerlink,.rst-content h6 .nav .headerlink,.rst-content p .btn .headerlink,.rst-content p .nav .headerlink,.rst-content table>caption .btn .headerlink,.rst-content table>caption .nav .headerlink,.rst-content tt.download .btn span:first-child,.rst-content tt.download .nav span:first-child,.wy-menu-vertical li .btn button.toctree-expand,.wy-menu-vertical li.current>a .btn button.toctree-expand,.wy-menu-vertical li.current>a .nav button.toctree-expand,.wy-menu-vertical li .nav button.toctree-expand,.wy-menu-vertical li.on a .btn button.toctree-expand,.wy-menu-vertical li.on a .nav button.toctree-expand{display:inline}.btn .fa-large.icon,.btn .fa.fa-large,.btn .rst-content .code-block-caption .fa-large.headerlink,.btn .rst-content .eqno .fa-large.headerlink,.btn .rst-content .fa-large.admonition-title,.btn .rst-content code.download span.fa-large:first-child,.btn .rst-content dl dt .fa-large.headerlink,.btn .rst-content h1 .fa-large.headerlink,.btn .rst-content h2 .fa-large.headerlink,.btn .rst-content h3 .fa-large.headerlink,.btn .rst-content h4 .fa-large.headerlink,.btn .rst-content h5 .fa-large.headerlink,.btn .rst-content h6 .fa-large.headerlink,.btn .rst-content p .fa-large.headerlink,.btn .rst-content table>caption .fa-large.headerlink,.btn .rst-content tt.download span.fa-large:first-child,.btn .wy-menu-vertical li button.fa-large.toctree-expand,.nav .fa-large.icon,.nav .fa.fa-large,.nav .rst-content .code-block-caption .fa-large.headerlink,.nav .rst-content .eqno .fa-large.headerlink,.nav .rst-content .fa-large.admonition-title,.nav .rst-content code.download span.fa-large:first-child,.nav .rst-content dl dt .fa-large.headerlink,.nav .rst-content h1 .fa-large.headerlink,.nav .rst-content h2 .fa-large.headerlink,.nav .rst-content h3 .fa-large.headerlink,.nav .rst-content h4 .fa-large.headerlink,.nav .rst-content h5 .fa-large.headerlink,.nav .rst-content h6 .fa-large.headerlink,.nav .rst-content p .fa-large.headerlink,.nav .rst-content table>caption .fa-large.headerlink,.nav .rst-content tt.download span.fa-large:first-child,.nav .wy-menu-vertical li button.fa-large.toctree-expand,.rst-content .btn .fa-large.admonition-title,.rst-content .code-block-caption .btn .fa-large.headerlink,.rst-content .code-block-caption .nav .fa-large.headerlink,.rst-content .eqno .btn .fa-large.headerlink,.rst-content .eqno .nav .fa-large.headerlink,.rst-content .nav .fa-large.admonition-title,.rst-content code.download .btn span.fa-large:first-child,.rst-content code.download .nav span.fa-large:first-child,.rst-content dl dt .btn .fa-large.headerlink,.rst-content dl dt .nav .fa-large.headerlink,.rst-content h1 .btn .fa-large.headerlink,.rst-content h1 .nav .fa-large.headerlink,.rst-content h2 .btn .fa-large.headerlink,.rst-content h2 .nav .fa-large.headerlink,.rst-content h3 .btn .fa-large.headerlink,.rst-content h3 .nav .fa-large.headerlink,.rst-content h4 .btn .fa-large.headerlink,.rst-content h4 .nav .fa-large.headerlink,.rst-content h5 .btn .fa-large.headerlink,.rst-content h5 .nav .fa-large.headerlink,.rst-content h6 .btn .fa-large.headerlink,.rst-content h6 .nav .fa-large.headerlink,.rst-content p .btn .fa-large.headerlink,.rst-content p .nav .fa-large.headerlink,.rst-content table>caption .btn .fa-large.headerlink,.rst-content table>caption .nav .fa-large.headerlink,.rst-content tt.download .btn span.fa-large:first-child,.rst-content tt.download .nav span.fa-large:first-child,.wy-menu-vertical li .btn button.fa-large.toctree-expand,.wy-menu-vertical li .nav button.fa-large.toctree-expand{line-height:.9em}.btn .fa-spin.icon,.btn .fa.fa-spin,.btn .rst-content .code-block-caption .fa-spin.headerlink,.btn .rst-content .eqno .fa-spin.headerlink,.btn .rst-content .fa-spin.admonition-title,.btn .rst-content code.download span.fa-spin:first-child,.btn .rst-content dl dt .fa-spin.headerlink,.btn .rst-content h1 .fa-spin.headerlink,.btn .rst-content h2 .fa-spin.headerlink,.btn .rst-content h3 .fa-spin.headerlink,.btn .rst-content h4 .fa-spin.headerlink,.btn .rst-content h5 .fa-spin.headerlink,.btn .rst-content h6 .fa-spin.headerlink,.btn .rst-content p .fa-spin.headerlink,.btn .rst-content table>caption .fa-spin.headerlink,.btn .rst-content tt.download span.fa-spin:first-child,.btn .wy-menu-vertical li button.fa-spin.toctree-expand,.nav .fa-spin.icon,.nav .fa.fa-spin,.nav .rst-content .code-block-caption .fa-spin.headerlink,.nav .rst-content .eqno .fa-spin.headerlink,.nav .rst-content .fa-spin.admonition-title,.nav .rst-content code.download span.fa-spin:first-child,.nav .rst-content dl dt .fa-spin.headerlink,.nav .rst-content h1 .fa-spin.headerlink,.nav .rst-content h2 .fa-spin.headerlink,.nav .rst-content h3 .fa-spin.headerlink,.nav .rst-content h4 .fa-spin.headerlink,.nav .rst-content h5 .fa-spin.headerlink,.nav .rst-content h6 .fa-spin.headerlink,.nav .rst-content p .fa-spin.headerlink,.nav .rst-content table>caption .fa-spin.headerlink,.nav .rst-content tt.download span.fa-spin:first-child,.nav .wy-menu-vertical li button.fa-spin.toctree-expand,.rst-content .btn .fa-spin.admonition-title,.rst-content .code-block-caption .btn .fa-spin.headerlink,.rst-content .code-block-caption .nav .fa-spin.headerlink,.rst-content .eqno .btn .fa-spin.headerlink,.rst-content .eqno .nav .fa-spin.headerlink,.rst-content .nav .fa-spin.admonition-title,.rst-content code.download .btn span.fa-spin:first-child,.rst-content code.download .nav span.fa-spin:first-child,.rst-content dl dt .btn .fa-spin.headerlink,.rst-content dl dt .nav .fa-spin.headerlink,.rst-content h1 .btn .fa-spin.headerlink,.rst-content h1 .nav .fa-spin.headerlink,.rst-content h2 .btn .fa-spin.headerlink,.rst-content h2 .nav .fa-spin.headerlink,.rst-content h3 .btn .fa-spin.headerlink,.rst-content h3 .nav .fa-spin.headerlink,.rst-content h4 .btn .fa-spin.headerlink,.rst-content h4 .nav .fa-spin.headerlink,.rst-content h5 .btn .fa-spin.headerlink,.rst-content h5 .nav .fa-spin.headerlink,.rst-content h6 .btn .fa-spin.headerlink,.rst-content h6 .nav .fa-spin.headerlink,.rst-content p .btn .fa-spin.headerlink,.rst-content p .nav .fa-spin.headerlink,.rst-content table>caption .btn .fa-spin.headerlink,.rst-content table>caption .nav .fa-spin.headerlink,.rst-content tt.download .btn span.fa-spin:first-child,.rst-content tt.download .nav span.fa-spin:first-child,.wy-menu-vertical li .btn button.fa-spin.toctree-expand,.wy-menu-vertical li .nav button.fa-spin.toctree-expand{display:inline-block}.btn.fa:before,.btn.icon:before,.rst-content .btn.admonition-title:before,.rst-content .code-block-caption .btn.headerlink:before,.rst-content .eqno .btn.headerlink:before,.rst-content code.download span.btn:first-child:before,.rst-content dl dt .btn.headerlink:before,.rst-content h1 .btn.headerlink:before,.rst-content h2 .btn.headerlink:before,.rst-content h3 .btn.headerlink:before,.rst-content h4 .btn.headerlink:before,.rst-content h5 .btn.headerlink:before,.rst-content h6 .btn.headerlink:before,.rst-content p .btn.headerlink:before,.rst-content table>caption .btn.headerlink:before,.rst-content tt.download span.btn:first-child:before,.wy-menu-vertical li button.btn.toctree-expand:before{opacity:.5;-webkit-transition:opacity .05s ease-in;-moz-transition:opacity .05s ease-in;transition:opacity .05s ease-in}.btn.fa:hover:before,.btn.icon:hover:before,.rst-content .btn.admonition-title:hover:before,.rst-content .code-block-caption .btn.headerlink:hover:before,.rst-content .eqno .btn.headerlink:hover:before,.rst-content code.download span.btn:first-child:hover:before,.rst-content dl dt .btn.headerlink:hover:before,.rst-content h1 .btn.headerlink:hover:before,.rst-content h2 .btn.headerlink:hover:before,.rst-content h3 .btn.headerlink:hover:before,.rst-content h4 .btn.headerlink:hover:before,.rst-content h5 .btn.headerlink:hover:before,.rst-content h6 .btn.headerlink:hover:before,.rst-content p .btn.headerlink:hover:before,.rst-content table>caption .btn.headerlink:hover:before,.rst-content tt.download span.btn:first-child:hover:before,.wy-menu-vertical li button.btn.toctree-expand:hover:before{opacity:1}.btn-mini .fa:before,.btn-mini .icon:before,.btn-mini .rst-content .admonition-title:before,.btn-mini .rst-content .code-block-caption .headerlink:before,.btn-mini .rst-content .eqno .headerlink:before,.btn-mini .rst-content code.download span:first-child:before,.btn-mini .rst-content dl dt .headerlink:before,.btn-mini .rst-content h1 .headerlink:before,.btn-mini .rst-content h2 .headerlink:before,.btn-mini .rst-content h3 .headerlink:before,.btn-mini .rst-content h4 .headerlink:before,.btn-mini .rst-content h5 .headerlink:before,.btn-mini .rst-content h6 .headerlink:before,.btn-mini .rst-content p .headerlink:before,.btn-mini .rst-content table>caption .headerlink:before,.btn-mini .rst-content tt.download span:first-child:before,.btn-mini .wy-menu-vertical li button.toctree-expand:before,.rst-content .btn-mini .admonition-title:before,.rst-content .code-block-caption .btn-mini .headerlink:before,.rst-content .eqno .btn-mini .headerlink:before,.rst-content code.download .btn-mini span:first-child:before,.rst-content dl dt .btn-mini .headerlink:before,.rst-content h1 .btn-mini .headerlink:before,.rst-content h2 .btn-mini .headerlink:before,.rst-content h3 .btn-mini .headerlink:before,.rst-content h4 .btn-mini .headerlink:before,.rst-content h5 .btn-mini .headerlink:before,.rst-content h6 .btn-mini .headerlink:before,.rst-content p .btn-mini .headerlink:before,.rst-content table>caption .btn-mini .headerlink:before,.rst-content tt.download .btn-mini span:first-child:before,.wy-menu-vertical li .btn-mini button.toctree-expand:before{font-size:14px;vertical-align:-15%}.rst-content .admonition,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning,.wy-alert{padding:12px;line-height:24px;margin-bottom:24px;background:#e7f2fa}.rst-content .admonition-title,.wy-alert-title{font-weight:700;display:block;color:#fff;background:#6ab0de;padding:6px 12px;margin:-12px -12px 12px}.rst-content .danger,.rst-content .error,.rst-content .wy-alert-danger.admonition,.rst-content .wy-alert-danger.admonition-todo,.rst-content .wy-alert-danger.attention,.rst-content .wy-alert-danger.caution,.rst-content .wy-alert-danger.hint,.rst-content .wy-alert-danger.important,.rst-content .wy-alert-danger.note,.rst-content .wy-alert-danger.seealso,.rst-content .wy-alert-danger.tip,.rst-content .wy-alert-danger.warning,.wy-alert.wy-alert-danger{background:#fdf3f2}.rst-content .danger .admonition-title,.rst-content .danger .wy-alert-title,.rst-content .error .admonition-title,.rst-content .error .wy-alert-title,.rst-content .wy-alert-danger.admonition-todo .admonition-title,.rst-content .wy-alert-danger.admonition-todo .wy-alert-title,.rst-content .wy-alert-danger.admonition .admonition-title,.rst-content .wy-alert-danger.admonition .wy-alert-title,.rst-content .wy-alert-danger.attention .admonition-title,.rst-content .wy-alert-danger.attention .wy-alert-title,.rst-content .wy-alert-danger.caution .admonition-title,.rst-content .wy-alert-danger.caution .wy-alert-title,.rst-content .wy-alert-danger.hint .admonition-title,.rst-content .wy-alert-danger.hint .wy-alert-title,.rst-content .wy-alert-danger.important .admonition-title,.rst-content .wy-alert-danger.important .wy-alert-title,.rst-content .wy-alert-danger.note .admonition-title,.rst-content .wy-alert-danger.note .wy-alert-title,.rst-content .wy-alert-danger.seealso .admonition-title,.rst-content .wy-alert-danger.seealso .wy-alert-title,.rst-content .wy-alert-danger.tip .admonition-title,.rst-content .wy-alert-danger.tip .wy-alert-title,.rst-content .wy-alert-danger.warning .admonition-title,.rst-content .wy-alert-danger.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-danger .admonition-title,.wy-alert.wy-alert-danger .rst-content .admonition-title,.wy-alert.wy-alert-danger .wy-alert-title{background:#f29f97}.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .warning,.rst-content .wy-alert-warning.admonition,.rst-content .wy-alert-warning.danger,.rst-content .wy-alert-warning.error,.rst-content .wy-alert-warning.hint,.rst-content .wy-alert-warning.important,.rst-content .wy-alert-warning.note,.rst-content .wy-alert-warning.seealso,.rst-content .wy-alert-warning.tip,.wy-alert.wy-alert-warning{background:#ffedcc}.rst-content .admonition-todo .admonition-title,.rst-content .admonition-todo .wy-alert-title,.rst-content .attention .admonition-title,.rst-content .attention .wy-alert-title,.rst-content .caution .admonition-title,.rst-content .caution .wy-alert-title,.rst-content .warning .admonition-title,.rst-content .warning .wy-alert-title,.rst-content .wy-alert-warning.admonition .admonition-title,.rst-content .wy-alert-warning.admonition .wy-alert-title,.rst-content .wy-alert-warning.danger .admonition-title,.rst-content .wy-alert-warning.danger .wy-alert-title,.rst-content .wy-alert-warning.error .admonition-title,.rst-content .wy-alert-warning.error .wy-alert-title,.rst-content .wy-alert-warning.hint .admonition-title,.rst-content .wy-alert-warning.hint .wy-alert-title,.rst-content .wy-alert-warning.important .admonition-title,.rst-content .wy-alert-warning.important .wy-alert-title,.rst-content .wy-alert-warning.note .admonition-title,.rst-content .wy-alert-warning.note .wy-alert-title,.rst-content .wy-alert-warning.seealso .admonition-title,.rst-content .wy-alert-warning.seealso .wy-alert-title,.rst-content .wy-alert-warning.tip .admonition-title,.rst-content .wy-alert-warning.tip .wy-alert-title,.rst-content .wy-alert.wy-alert-warning .admonition-title,.wy-alert.wy-alert-warning .rst-content .admonition-title,.wy-alert.wy-alert-warning .wy-alert-title{background:#f0b37e}.rst-content .note,.rst-content .seealso,.rst-content .wy-alert-info.admonition,.rst-content .wy-alert-info.admonition-todo,.rst-content .wy-alert-info.attention,.rst-content .wy-alert-info.caution,.rst-content .wy-alert-info.danger,.rst-content .wy-alert-info.error,.rst-content .wy-alert-info.hint,.rst-content .wy-alert-info.important,.rst-content .wy-alert-info.tip,.rst-content .wy-alert-info.warning,.wy-alert.wy-alert-info{background:#e7f2fa}.rst-content .note .admonition-title,.rst-content .note .wy-alert-title,.rst-content .seealso .admonition-title,.rst-content .seealso .wy-alert-title,.rst-content .wy-alert-info.admonition-todo .admonition-title,.rst-content .wy-alert-info.admonition-todo .wy-alert-title,.rst-content .wy-alert-info.admonition .admonition-title,.rst-content .wy-alert-info.admonition .wy-alert-title,.rst-content .wy-alert-info.attention .admonition-title,.rst-content .wy-alert-info.attention .wy-alert-title,.rst-content .wy-alert-info.caution .admonition-title,.rst-content .wy-alert-info.caution .wy-alert-title,.rst-content .wy-alert-info.danger .admonition-title,.rst-content .wy-alert-info.danger .wy-alert-title,.rst-content .wy-alert-info.error .admonition-title,.rst-content .wy-alert-info.error .wy-alert-title,.rst-content .wy-alert-info.hint .admonition-title,.rst-content .wy-alert-info.hint .wy-alert-title,.rst-content .wy-alert-info.important .admonition-title,.rst-content .wy-alert-info.important .wy-alert-title,.rst-content .wy-alert-info.tip .admonition-title,.rst-content .wy-alert-info.tip .wy-alert-title,.rst-content .wy-alert-info.warning .admonition-title,.rst-content .wy-alert-info.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-info .admonition-title,.wy-alert.wy-alert-info .rst-content .admonition-title,.wy-alert.wy-alert-info .wy-alert-title{background:#6ab0de}.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .wy-alert-success.admonition,.rst-content .wy-alert-success.admonition-todo,.rst-content .wy-alert-success.attention,.rst-content .wy-alert-success.caution,.rst-content .wy-alert-success.danger,.rst-content .wy-alert-success.error,.rst-content .wy-alert-success.note,.rst-content .wy-alert-success.seealso,.rst-content .wy-alert-success.warning,.wy-alert.wy-alert-success{background:#dbfaf4}.rst-content .hint .admonition-title,.rst-content .hint .wy-alert-title,.rst-content .important .admonition-title,.rst-content .important .wy-alert-title,.rst-content .tip .admonition-title,.rst-content .tip .wy-alert-title,.rst-content .wy-alert-success.admonition-todo .admonition-title,.rst-content .wy-alert-success.admonition-todo .wy-alert-title,.rst-content .wy-alert-success.admonition .admonition-title,.rst-content .wy-alert-success.admonition .wy-alert-title,.rst-content .wy-alert-success.attention .admonition-title,.rst-content .wy-alert-success.attention .wy-alert-title,.rst-content .wy-alert-success.caution .admonition-title,.rst-content .wy-alert-success.caution .wy-alert-title,.rst-content .wy-alert-success.danger .admonition-title,.rst-content .wy-alert-success.danger .wy-alert-title,.rst-content .wy-alert-success.error .admonition-title,.rst-content .wy-alert-success.error .wy-alert-title,.rst-content .wy-alert-success.note .admonition-title,.rst-content .wy-alert-success.note .wy-alert-title,.rst-content .wy-alert-success.seealso .admonition-title,.rst-content .wy-alert-success.seealso .wy-alert-title,.rst-content .wy-alert-success.warning .admonition-title,.rst-content .wy-alert-success.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-success .admonition-title,.wy-alert.wy-alert-success .rst-content .admonition-title,.wy-alert.wy-alert-success .wy-alert-title{background:#1abc9c}.rst-content .wy-alert-neutral.admonition,.rst-content .wy-alert-neutral.admonition-todo,.rst-content .wy-alert-neutral.attention,.rst-content .wy-alert-neutral.caution,.rst-content .wy-alert-neutral.danger,.rst-content .wy-alert-neutral.error,.rst-content .wy-alert-neutral.hint,.rst-content .wy-alert-neutral.important,.rst-content .wy-alert-neutral.note,.rst-content .wy-alert-neutral.seealso,.rst-content .wy-alert-neutral.tip,.rst-content .wy-alert-neutral.warning,.wy-alert.wy-alert-neutral{background:#f3f6f6}.rst-content .wy-alert-neutral.admonition-todo .admonition-title,.rst-content .wy-alert-neutral.admonition-todo .wy-alert-title,.rst-content .wy-alert-neutral.admonition .admonition-title,.rst-content .wy-alert-neutral.admonition .wy-alert-title,.rst-content .wy-alert-neutral.attention .admonition-title,.rst-content .wy-alert-neutral.attention .wy-alert-title,.rst-content .wy-alert-neutral.caution .admonition-title,.rst-content .wy-alert-neutral.caution .wy-alert-title,.rst-content .wy-alert-neutral.danger .admonition-title,.rst-content .wy-alert-neutral.danger .wy-alert-title,.rst-content .wy-alert-neutral.error .admonition-title,.rst-content .wy-alert-neutral.error .wy-alert-title,.rst-content .wy-alert-neutral.hint .admonition-title,.rst-content .wy-alert-neutral.hint .wy-alert-title,.rst-content .wy-alert-neutral.important .admonition-title,.rst-content .wy-alert-neutral.important .wy-alert-title,.rst-content .wy-alert-neutral.note .admonition-title,.rst-content .wy-alert-neutral.note .wy-alert-title,.rst-content .wy-alert-neutral.seealso .admonition-title,.rst-content .wy-alert-neutral.seealso .wy-alert-title,.rst-content .wy-alert-neutral.tip .admonition-title,.rst-content .wy-alert-neutral.tip .wy-alert-title,.rst-content .wy-alert-neutral.warning .admonition-title,.rst-content .wy-alert-neutral.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-neutral .admonition-title,.wy-alert.wy-alert-neutral .rst-content .admonition-title,.wy-alert.wy-alert-neutral .wy-alert-title{color:#404040;background:#e1e4e5}.rst-content .wy-alert-neutral.admonition-todo a,.rst-content .wy-alert-neutral.admonition a,.rst-content .wy-alert-neutral.attention a,.rst-content .wy-alert-neutral.caution a,.rst-content .wy-alert-neutral.danger a,.rst-content .wy-alert-neutral.error a,.rst-content .wy-alert-neutral.hint a,.rst-content .wy-alert-neutral.important a,.rst-content .wy-alert-neutral.note a,.rst-content .wy-alert-neutral.seealso a,.rst-content .wy-alert-neutral.tip a,.rst-content .wy-alert-neutral.warning a,.wy-alert.wy-alert-neutral a{color:#2980b9}.rst-content .admonition-todo p:last-child,.rst-content .admonition p:last-child,.rst-content .attention p:last-child,.rst-content .caution p:last-child,.rst-content .danger p:last-child,.rst-content .error p:last-child,.rst-content .hint p:last-child,.rst-content .important p:last-child,.rst-content .note p:last-child,.rst-content .seealso p:last-child,.rst-content .tip p:last-child,.rst-content .warning p:last-child,.wy-alert p:last-child{margin-bottom:0}.wy-tray-container{position:fixed;bottom:0;left:0;z-index:600}.wy-tray-container li{display:block;width:300px;background:transparent;color:#fff;text-align:center;box-shadow:0 5px 5px 0 rgba(0,0,0,.1);padding:0 24px;min-width:20%;opacity:0;height:0;line-height:56px;overflow:hidden;-webkit-transition:all .3s ease-in;-moz-transition:all .3s ease-in;transition:all .3s ease-in}.wy-tray-container li.wy-tray-item-success{background:#27ae60}.wy-tray-container li.wy-tray-item-info{background:#2980b9}.wy-tray-container li.wy-tray-item-warning{background:#e67e22}.wy-tray-container li.wy-tray-item-danger{background:#e74c3c}.wy-tray-container li.on{opacity:1;height:56px}@media screen and (max-width:768px){.wy-tray-container{bottom:auto;top:0;width:100%}.wy-tray-container li{width:100%}}button{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle;cursor:pointer;line-height:normal;-webkit-appearance:button;*overflow:visible}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}button[disabled]{cursor:default}.btn{display:inline-block;border-radius:2px;line-height:normal;white-space:nowrap;text-align:center;cursor:pointer;font-size:100%;padding:6px 12px 8px;color:#fff;border:1px solid rgba(0,0,0,.1);background-color:#27ae60;text-decoration:none;font-weight:400;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;box-shadow:inset 0 1px 2px -1px hsla(0,0%,100%,.5),inset 0 -2px 0 0 rgba(0,0,0,.1);outline-none:false;vertical-align:middle;*display:inline;zoom:1;-webkit-user-drag:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;-webkit-transition:all .1s linear;-moz-transition:all .1s linear;transition:all .1s linear}.btn-hover{background:#2e8ece;color:#fff}.btn:hover{background:#2cc36b;color:#fff}.btn:focus{background:#2cc36b;outline:0}.btn:active{box-shadow:inset 0 -1px 0 0 rgba(0,0,0,.05),inset 0 2px 0 0 rgba(0,0,0,.1);padding:8px 12px 6px}.btn:visited{color:#fff}.btn-disabled,.btn-disabled:active,.btn-disabled:focus,.btn-disabled:hover,.btn:disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:.4;cursor:not-allowed;box-shadow:none}.btn::-moz-focus-inner{padding:0;border:0}.btn-small{font-size:80%}.btn-info{background-color:#2980b9!important}.btn-info:hover{background-color:#2e8ece!important}.btn-neutral{background-color:#f3f6f6!important;color:#404040!important}.btn-neutral:hover{background-color:#e5ebeb!important;color:#404040}.btn-neutral:visited{color:#404040!important}.btn-success{background-color:#27ae60!important}.btn-success:hover{background-color:#295!important}.btn-danger{background-color:#e74c3c!important}.btn-danger:hover{background-color:#ea6153!important}.btn-warning{background-color:#e67e22!important}.btn-warning:hover{background-color:#e98b39!important}.btn-invert{background-color:#222}.btn-invert:hover{background-color:#2f2f2f!important}.btn-link{background-color:transparent!important;color:#2980b9;box-shadow:none;border-color:transparent!important}.btn-link:active,.btn-link:hover{background-color:transparent!important;color:#409ad5!important;box-shadow:none}.btn-link:visited{color:#9b59b6}.wy-btn-group .btn,.wy-control .btn{vertical-align:middle}.wy-btn-group{margin-bottom:24px;*zoom:1}.wy-btn-group:after,.wy-btn-group:before{display:table;content:""}.wy-btn-group:after{clear:both}.wy-dropdown{position:relative;display:inline-block}.wy-dropdown-active .wy-dropdown-menu{display:block}.wy-dropdown-menu{position:absolute;left:0;display:none;float:left;top:100%;min-width:100%;background:#fcfcfc;z-index:100;border:1px solid #cfd7dd;box-shadow:0 2px 2px 0 rgba(0,0,0,.1);padding:12px}.wy-dropdown-menu>dd>a{display:block;clear:both;color:#404040;white-space:nowrap;font-size:90%;padding:0 12px;cursor:pointer}.wy-dropdown-menu>dd>a:hover{background:#2980b9;color:#fff}.wy-dropdown-menu>dd.divider{border-top:1px solid #cfd7dd;margin:6px 0}.wy-dropdown-menu>dd.search{padding-bottom:12px}.wy-dropdown-menu>dd.search input[type=search]{width:100%}.wy-dropdown-menu>dd.call-to-action{background:#e3e3e3;text-transform:uppercase;font-weight:500;font-size:80%}.wy-dropdown-menu>dd.call-to-action:hover{background:#e3e3e3}.wy-dropdown-menu>dd.call-to-action .btn{color:#fff}.wy-dropdown.wy-dropdown-up .wy-dropdown-menu{bottom:100%;top:auto;left:auto;right:0}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu{background:#fcfcfc;margin-top:2px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a{padding:6px 12px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a:hover{background:#2980b9;color:#fff}.wy-dropdown.wy-dropdown-left .wy-dropdown-menu{right:0;left:auto;text-align:right}.wy-dropdown-arrow:before{content:" ";border-bottom:5px solid #f5f5f5;border-left:5px solid transparent;border-right:5px solid transparent;position:absolute;display:block;top:-4px;left:50%;margin-left:-3px}.wy-dropdown-arrow.wy-dropdown-arrow-left:before{left:11px}.wy-form-stacked select{display:block}.wy-form-aligned .wy-help-inline,.wy-form-aligned input,.wy-form-aligned label,.wy-form-aligned select,.wy-form-aligned textarea{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-form-aligned .wy-control-group>label{display:inline-block;vertical-align:middle;width:10em;margin:6px 12px 0 0;float:left}.wy-form-aligned .wy-control{float:left}.wy-form-aligned .wy-control label{display:block}.wy-form-aligned .wy-control select{margin-top:6px}fieldset{margin:0}fieldset,legend{border:0;padding:0}legend{width:100%;white-space:normal;margin-bottom:24px;font-size:150%;*margin-left:-7px}label,legend{display:block}label{margin:0 0 .3125em;color:#333;font-size:90%}input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}.wy-control-group{margin-bottom:24px;max-width:1200px;margin-left:auto;margin-right:auto;*zoom:1}.wy-control-group:after,.wy-control-group:before{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group.wy-control-group-required>label:after{content:" *";color:#e74c3c}.wy-control-group .wy-form-full,.wy-control-group .wy-form-halves,.wy-control-group .wy-form-thirds{padding-bottom:12px}.wy-control-group .wy-form-full input[type=color],.wy-control-group .wy-form-full input[type=date],.wy-control-group .wy-form-full input[type=datetime-local],.wy-control-group .wy-form-full input[type=datetime],.wy-control-group .wy-form-full input[type=email],.wy-control-group .wy-form-full input[type=month],.wy-control-group .wy-form-full input[type=number],.wy-control-group .wy-form-full input[type=password],.wy-control-group .wy-form-full input[type=search],.wy-control-group .wy-form-full input[type=tel],.wy-control-group .wy-form-full input[type=text],.wy-control-group .wy-form-full input[type=time],.wy-control-group .wy-form-full input[type=url],.wy-control-group .wy-form-full input[type=week],.wy-control-group .wy-form-full select,.wy-control-group .wy-form-halves input[type=color],.wy-control-group .wy-form-halves input[type=date],.wy-control-group .wy-form-halves input[type=datetime-local],.wy-control-group .wy-form-halves input[type=datetime],.wy-control-group .wy-form-halves input[type=email],.wy-control-group .wy-form-halves input[type=month],.wy-control-group .wy-form-halves input[type=number],.wy-control-group .wy-form-halves input[type=password],.wy-control-group .wy-form-halves input[type=search],.wy-control-group .wy-form-halves input[type=tel],.wy-control-group .wy-form-halves input[type=text],.wy-control-group .wy-form-halves input[type=time],.wy-control-group .wy-form-halves input[type=url],.wy-control-group .wy-form-halves input[type=week],.wy-control-group .wy-form-halves select,.wy-control-group .wy-form-thirds input[type=color],.wy-control-group .wy-form-thirds input[type=date],.wy-control-group .wy-form-thirds input[type=datetime-local],.wy-control-group .wy-form-thirds input[type=datetime],.wy-control-group .wy-form-thirds input[type=email],.wy-control-group .wy-form-thirds input[type=month],.wy-control-group .wy-form-thirds input[type=number],.wy-control-group .wy-form-thirds input[type=password],.wy-control-group .wy-form-thirds input[type=search],.wy-control-group .wy-form-thirds input[type=tel],.wy-control-group .wy-form-thirds input[type=text],.wy-control-group .wy-form-thirds input[type=time],.wy-control-group .wy-form-thirds input[type=url],.wy-control-group .wy-form-thirds input[type=week],.wy-control-group .wy-form-thirds select{width:100%}.wy-control-group .wy-form-full{float:left;display:block;width:100%;margin-right:0}.wy-control-group .wy-form-full:last-child{margin-right:0}.wy-control-group .wy-form-halves{float:left;display:block;margin-right:2.35765%;width:48.82117%}.wy-control-group .wy-form-halves:last-child,.wy-control-group .wy-form-halves:nth-of-type(2n){margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(odd){clear:left}.wy-control-group .wy-form-thirds{float:left;display:block;margin-right:2.35765%;width:31.76157%}.wy-control-group .wy-form-thirds:last-child,.wy-control-group .wy-form-thirds:nth-of-type(3n){margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n+1){clear:left}.wy-control-group.wy-control-group-no-input .wy-control,.wy-control-no-input{margin:6px 0 0;font-size:90%}.wy-control-no-input{display:inline-block}.wy-control-group.fluid-input input[type=color],.wy-control-group.fluid-input input[type=date],.wy-control-group.fluid-input input[type=datetime-local],.wy-control-group.fluid-input input[type=datetime],.wy-control-group.fluid-input input[type=email],.wy-control-group.fluid-input input[type=month],.wy-control-group.fluid-input input[type=number],.wy-control-group.fluid-input input[type=password],.wy-control-group.fluid-input input[type=search],.wy-control-group.fluid-input input[type=tel],.wy-control-group.fluid-input input[type=text],.wy-control-group.fluid-input input[type=time],.wy-control-group.fluid-input input[type=url],.wy-control-group.fluid-input input[type=week]{width:100%}.wy-form-message-inline{padding-left:.3em;color:#666;font-size:90%}.wy-form-message{display:block;color:#999;font-size:70%;margin-top:.3125em;font-style:italic}.wy-form-message p{font-size:inherit;font-style:italic;margin-bottom:6px}.wy-form-message p:last-child{margin-bottom:0}input{line-height:normal}input[type=button],input[type=reset],input[type=submit]{-webkit-appearance:button;cursor:pointer;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;*overflow:visible}input[type=color],input[type=date],input[type=datetime-local],input[type=datetime],input[type=email],input[type=month],input[type=number],input[type=password],input[type=search],input[type=tel],input[type=text],input[type=time],input[type=url],input[type=week]{-webkit-appearance:none;padding:6px;display:inline-block;border:1px solid #ccc;font-size:80%;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;box-shadow:inset 0 1px 3px #ddd;border-radius:0;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}input[type=datetime-local]{padding:.34375em .625em}input[disabled]{cursor:default}input[type=checkbox],input[type=radio]{padding:0;margin-right:.3125em;*height:13px;*width:13px}input[type=checkbox],input[type=radio],input[type=search]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}input[type=search]::-webkit-search-cancel-button,input[type=search]::-webkit-search-decoration{-webkit-appearance:none}input[type=color]:focus,input[type=date]:focus,input[type=datetime-local]:focus,input[type=datetime]:focus,input[type=email]:focus,input[type=month]:focus,input[type=number]:focus,input[type=password]:focus,input[type=search]:focus,input[type=tel]:focus,input[type=text]:focus,input[type=time]:focus,input[type=url]:focus,input[type=week]:focus{outline:0;outline:thin dotted\9;border-color:#333}input.no-focus:focus{border-color:#ccc!important}input[type=checkbox]:focus,input[type=file]:focus,input[type=radio]:focus{outline:thin dotted #333;outline:1px auto #129fea}input[type=color][disabled],input[type=date][disabled],input[type=datetime-local][disabled],input[type=datetime][disabled],input[type=email][disabled],input[type=month][disabled],input[type=number][disabled],input[type=password][disabled],input[type=search][disabled],input[type=tel][disabled],input[type=text][disabled],input[type=time][disabled],input[type=url][disabled],input[type=week][disabled]{cursor:not-allowed;background-color:#fafafa}input:focus:invalid,select:focus:invalid,textarea:focus:invalid{color:#e74c3c;border:1px solid #e74c3c}input:focus:invalid:focus,select:focus:invalid:focus,textarea:focus:invalid:focus{border-color:#e74c3c}input[type=checkbox]:focus:invalid:focus,input[type=file]:focus:invalid:focus,input[type=radio]:focus:invalid:focus{outline-color:#e74c3c}input.wy-input-large{padding:12px;font-size:100%}textarea{overflow:auto;vertical-align:top;width:100%;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif}select,textarea{padding:.5em .625em;display:inline-block;border:1px solid #ccc;font-size:80%;box-shadow:inset 0 1px 3px #ddd;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}select{border:1px solid #ccc;background-color:#fff}select[multiple]{height:auto}select:focus,textarea:focus{outline:0}input[readonly],select[disabled],select[readonly],textarea[disabled],textarea[readonly]{cursor:not-allowed;background-color:#fafafa}input[type=checkbox][disabled],input[type=radio][disabled]{cursor:not-allowed}.wy-checkbox,.wy-radio{margin:6px 0;color:#404040;display:block}.wy-checkbox input,.wy-radio input{vertical-align:baseline}.wy-form-message-inline{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-input-prefix,.wy-input-suffix{white-space:nowrap;padding:6px}.wy-input-prefix .wy-input-context,.wy-input-suffix .wy-input-context{line-height:27px;padding:0 8px;display:inline-block;font-size:80%;background-color:#f3f6f6;border:1px solid #ccc;color:#999}.wy-input-suffix .wy-input-context{border-left:0}.wy-input-prefix .wy-input-context{border-right:0}.wy-switch{position:relative;display:block;height:24px;margin-top:12px;cursor:pointer}.wy-switch:before{left:0;top:0;width:36px;height:12px;background:#ccc}.wy-switch:after,.wy-switch:before{position:absolute;content:"";display:block;border-radius:4px;-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;transition:all .2s ease-in-out}.wy-switch:after{width:18px;height:18px;background:#999;left:-3px;top:-3px}.wy-switch span{position:absolute;left:48px;display:block;font-size:12px;color:#ccc;line-height:1}.wy-switch.active:before{background:#1e8449}.wy-switch.active:after{left:24px;background:#27ae60}.wy-switch.disabled{cursor:not-allowed;opacity:.8}.wy-control-group.wy-control-group-error .wy-form-message,.wy-control-group.wy-control-group-error>label{color:#e74c3c}.wy-control-group.wy-control-group-error input[type=color],.wy-control-group.wy-control-group-error input[type=date],.wy-control-group.wy-control-group-error input[type=datetime-local],.wy-control-group.wy-control-group-error input[type=datetime],.wy-control-group.wy-control-group-error input[type=email],.wy-control-group.wy-control-group-error input[type=month],.wy-control-group.wy-control-group-error input[type=number],.wy-control-group.wy-control-group-error input[type=password],.wy-control-group.wy-control-group-error input[type=search],.wy-control-group.wy-control-group-error input[type=tel],.wy-control-group.wy-control-group-error input[type=text],.wy-control-group.wy-control-group-error input[type=time],.wy-control-group.wy-control-group-error input[type=url],.wy-control-group.wy-control-group-error input[type=week],.wy-control-group.wy-control-group-error textarea{border:1px solid #e74c3c}.wy-inline-validate{white-space:nowrap}.wy-inline-validate .wy-input-context{padding:.5em .625em;display:inline-block;font-size:80%}.wy-inline-validate.wy-inline-validate-success .wy-input-context{color:#27ae60}.wy-inline-validate.wy-inline-validate-danger .wy-input-context{color:#e74c3c}.wy-inline-validate.wy-inline-validate-warning .wy-input-context{color:#e67e22}.wy-inline-validate.wy-inline-validate-info .wy-input-context{color:#2980b9}.rotate-90{-webkit-transform:rotate(90deg);-moz-transform:rotate(90deg);-ms-transform:rotate(90deg);-o-transform:rotate(90deg);transform:rotate(90deg)}.rotate-180{-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}.rotate-270{-webkit-transform:rotate(270deg);-moz-transform:rotate(270deg);-ms-transform:rotate(270deg);-o-transform:rotate(270deg);transform:rotate(270deg)}.mirror{-webkit-transform:scaleX(-1);-moz-transform:scaleX(-1);-ms-transform:scaleX(-1);-o-transform:scaleX(-1);transform:scaleX(-1)}.mirror.rotate-90{-webkit-transform:scaleX(-1) rotate(90deg);-moz-transform:scaleX(-1) rotate(90deg);-ms-transform:scaleX(-1) rotate(90deg);-o-transform:scaleX(-1) rotate(90deg);transform:scaleX(-1) rotate(90deg)}.mirror.rotate-180{-webkit-transform:scaleX(-1) rotate(180deg);-moz-transform:scaleX(-1) rotate(180deg);-ms-transform:scaleX(-1) rotate(180deg);-o-transform:scaleX(-1) rotate(180deg);transform:scaleX(-1) rotate(180deg)}.mirror.rotate-270{-webkit-transform:scaleX(-1) rotate(270deg);-moz-transform:scaleX(-1) rotate(270deg);-ms-transform:scaleX(-1) rotate(270deg);-o-transform:scaleX(-1) rotate(270deg);transform:scaleX(-1) rotate(270deg)}@media only screen and (max-width:480px){.wy-form button[type=submit]{margin:.7em 0 0}.wy-form input[type=color],.wy-form input[type=date],.wy-form input[type=datetime-local],.wy-form input[type=datetime],.wy-form input[type=email],.wy-form input[type=month],.wy-form input[type=number],.wy-form input[type=password],.wy-form input[type=search],.wy-form input[type=tel],.wy-form input[type=text],.wy-form input[type=time],.wy-form input[type=url],.wy-form input[type=week],.wy-form label{margin-bottom:.3em;display:block}.wy-form input[type=color],.wy-form input[type=date],.wy-form input[type=datetime-local],.wy-form input[type=datetime],.wy-form input[type=email],.wy-form input[type=month],.wy-form input[type=number],.wy-form input[type=password],.wy-form input[type=search],.wy-form input[type=tel],.wy-form input[type=time],.wy-form input[type=url],.wy-form input[type=week]{margin-bottom:0}.wy-form-aligned .wy-control-group label{margin-bottom:.3em;text-align:left;display:block;width:100%}.wy-form-aligned .wy-control{margin:1.5em 0 0}.wy-form-message,.wy-form-message-inline,.wy-form .wy-help-inline{display:block;font-size:80%;padding:6px 0}}@media screen and (max-width:768px){.tablet-hide{display:none}}@media screen and (max-width:480px){.mobile-hide{display:none}}.float-left{float:left}.float-right{float:right}.full-width{width:100%}.rst-content table.docutils,.rst-content table.field-list,.wy-table{border-collapse:collapse;border-spacing:0;empty-cells:show;margin-bottom:24px}.rst-content table.docutils caption,.rst-content table.field-list caption,.wy-table caption{color:#000;font:italic 85%/1 arial,sans-serif;padding:1em 0;text-align:center}.rst-content table.docutils td,.rst-content table.docutils th,.rst-content table.field-list td,.rst-content table.field-list th,.wy-table td,.wy-table th{font-size:90%;margin:0;overflow:visible;padding:8px 16px}.rst-content table.docutils td:first-child,.rst-content table.docutils th:first-child,.rst-content table.field-list td:first-child,.rst-content table.field-list th:first-child,.wy-table td:first-child,.wy-table th:first-child{border-left-width:0}.rst-content table.docutils thead,.rst-content table.field-list thead,.wy-table thead{color:#000;text-align:left;vertical-align:bottom;white-space:nowrap}.rst-content table.docutils thead th,.rst-content table.field-list thead th,.wy-table thead th{font-weight:700;border-bottom:2px solid #e1e4e5}.rst-content table.docutils td,.rst-content table.field-list td,.wy-table td{background-color:transparent;vertical-align:middle}.rst-content table.docutils td p,.rst-content table.field-list td p,.wy-table td p{line-height:18px}.rst-content table.docutils td p:last-child,.rst-content table.field-list td p:last-child,.wy-table td p:last-child{margin-bottom:0}.rst-content table.docutils .wy-table-cell-min,.rst-content table.field-list .wy-table-cell-min,.wy-table .wy-table-cell-min{width:1%;padding-right:0}.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox],.wy-table .wy-table-cell-min input[type=checkbox]{margin:0}.wy-table-secondary{color:grey;font-size:90%}.wy-table-tertiary{color:grey;font-size:80%}.rst-content table.docutils:not(.field-list) tr:nth-child(2n-1) td,.wy-table-backed,.wy-table-odd td,.wy-table-striped tr:nth-child(2n-1) td{background-color:#f3f6f6}.rst-content table.docutils,.wy-table-bordered-all{border:1px solid #e1e4e5}.rst-content table.docutils td,.wy-table-bordered-all td{border-bottom:1px solid #e1e4e5;border-left:1px solid #e1e4e5}.rst-content table.docutils tbody>tr:last-child td,.wy-table-bordered-all tbody>tr:last-child td{border-bottom-width:0}.wy-table-bordered{border:1px solid #e1e4e5}.wy-table-bordered-rows td{border-bottom:1px solid #e1e4e5}.wy-table-bordered-rows tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal td,.wy-table-horizontal th{border-width:0 0 1px;border-bottom:1px solid #e1e4e5}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-responsive{margin-bottom:24px;max-width:100%;overflow:auto}.wy-table-responsive table{margin-bottom:0!important}.wy-table-responsive table td,.wy-table-responsive table th{white-space:nowrap}a{color:#2980b9;text-decoration:none;cursor:pointer}a:hover{color:#3091d1}a:visited{color:#9b59b6}html{height:100%}body,html{overflow-x:hidden}body{font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;font-weight:400;color:#404040;min-height:100%;background:#edf0f2}.wy-text-left{text-align:left}.wy-text-center{text-align:center}.wy-text-right{text-align:right}.wy-text-large{font-size:120%}.wy-text-normal{font-size:100%}.wy-text-small,small{font-size:80%}.wy-text-strike{text-decoration:line-through}.wy-text-warning{color:#e67e22!important}a.wy-text-warning:hover{color:#eb9950!important}.wy-text-info{color:#2980b9!important}a.wy-text-info:hover{color:#409ad5!important}.wy-text-success{color:#27ae60!important}a.wy-text-success:hover{color:#36d278!important}.wy-text-danger{color:#e74c3c!important}a.wy-text-danger:hover{color:#ed7669!important}.wy-text-neutral{color:#404040!important}a.wy-text-neutral:hover{color:#595959!important}.rst-content .toctree-wrapper>p.caption,h1,h2,h3,h4,h5,h6,legend{margin-top:0;font-weight:700;font-family:Roboto Slab,ff-tisa-web-pro,Georgia,Arial,sans-serif}p{line-height:24px;font-size:16px;margin:0 0 24px}h1{font-size:175%}.rst-content .toctree-wrapper>p.caption,h2{font-size:150%}h3{font-size:125%}h4{font-size:115%}h5{font-size:110%}h6{font-size:100%}hr{display:block;height:1px;border:0;border-top:1px solid #e1e4e5;margin:24px 0;padding:0}.rst-content code,.rst-content tt,code{white-space:nowrap;max-width:100%;background:#fff;border:1px solid #e1e4e5;font-size:75%;padding:0 5px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;color:#e74c3c;overflow-x:auto}.rst-content tt.code-large,code.code-large{font-size:90%}.rst-content .section ul,.rst-content .toctree-wrapper ul,.rst-content section ul,.wy-plain-list-disc,article ul{list-style:disc;line-height:24px;margin-bottom:24px}.rst-content .section ul li,.rst-content .toctree-wrapper ul li,.rst-content section ul li,.wy-plain-list-disc li,article ul li{list-style:disc;margin-left:24px}.rst-content .section ul li p:last-child,.rst-content .section ul li ul,.rst-content .toctree-wrapper ul li p:last-child,.rst-content .toctree-wrapper ul li ul,.rst-content section ul li p:last-child,.rst-content section ul li ul,.wy-plain-list-disc li p:last-child,.wy-plain-list-disc li ul,article ul li p:last-child,article ul li ul{margin-bottom:0}.rst-content .section ul li li,.rst-content .toctree-wrapper ul li li,.rst-content section ul li li,.wy-plain-list-disc li li,article ul li li{list-style:circle}.rst-content .section ul li li li,.rst-content .toctree-wrapper ul li li li,.rst-content section ul li li li,.wy-plain-list-disc li li li,article ul li li li{list-style:square}.rst-content .section ul li ol li,.rst-content .toctree-wrapper ul li ol li,.rst-content section ul li ol li,.wy-plain-list-disc li ol li,article ul li ol li{list-style:decimal}.rst-content .section ol,.rst-content .section ol.arabic,.rst-content .toctree-wrapper ol,.rst-content .toctree-wrapper ol.arabic,.rst-content section ol,.rst-content section ol.arabic,.wy-plain-list-decimal,article ol{list-style:decimal;line-height:24px;margin-bottom:24px}.rst-content .section ol.arabic li,.rst-content .section ol li,.rst-content .toctree-wrapper ol.arabic li,.rst-content .toctree-wrapper ol li,.rst-content section ol.arabic li,.rst-content section ol li,.wy-plain-list-decimal li,article ol li{list-style:decimal;margin-left:24px}.rst-content .section ol.arabic li ul,.rst-content .section ol li p:last-child,.rst-content .section ol li ul,.rst-content .toctree-wrapper ol.arabic li ul,.rst-content .toctree-wrapper ol li p:last-child,.rst-content .toctree-wrapper ol li ul,.rst-content section ol.arabic li ul,.rst-content section ol li p:last-child,.rst-content section ol li ul,.wy-plain-list-decimal li p:last-child,.wy-plain-list-decimal li ul,article ol li p:last-child,article ol li ul{margin-bottom:0}.rst-content .section ol.arabic li ul li,.rst-content .section ol li ul li,.rst-content .toctree-wrapper ol.arabic li ul li,.rst-content .toctree-wrapper ol li ul li,.rst-content section ol.arabic li ul li,.rst-content section ol li ul li,.wy-plain-list-decimal li ul li,article ol li ul li{list-style:disc}.wy-breadcrumbs{*zoom:1}.wy-breadcrumbs:after,.wy-breadcrumbs:before{display:table;content:""}.wy-breadcrumbs:after{clear:both}.wy-breadcrumbs>li{display:inline-block;padding-top:5px}.wy-breadcrumbs>li.wy-breadcrumbs-aside{float:right}.rst-content .wy-breadcrumbs>li code,.rst-content .wy-breadcrumbs>li tt,.wy-breadcrumbs>li .rst-content tt,.wy-breadcrumbs>li code{all:inherit;color:inherit}.breadcrumb-item:before{content:"/";color:#bbb;font-size:13px;padding:0 6px 0 3px}.wy-breadcrumbs-extra{margin-bottom:0;color:#b3b3b3;font-size:80%;display:inline-block}@media screen and (max-width:480px){.wy-breadcrumbs-extra,.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}@media print{.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}html{font-size:16px}.wy-affix{position:fixed;top:1.618em}.wy-menu a:hover{text-decoration:none}.wy-menu-horiz{*zoom:1}.wy-menu-horiz:after,.wy-menu-horiz:before{display:table;content:""}.wy-menu-horiz:after{clear:both}.wy-menu-horiz li,.wy-menu-horiz ul{display:inline-block}.wy-menu-horiz li:hover{background:hsla(0,0%,100%,.1)}.wy-menu-horiz li.divide-left{border-left:1px solid #404040}.wy-menu-horiz li.divide-right{border-right:1px solid #404040}.wy-menu-horiz a{height:32px;display:inline-block;line-height:32px;padding:0 16px}.wy-menu-vertical{width:300px}.wy-menu-vertical header,.wy-menu-vertical p.caption{color:#55a5d9;height:32px;line-height:32px;padding:0 1.618em;margin:12px 0 0;display:block;font-weight:700;text-transform:uppercase;font-size:85%;white-space:nowrap}.wy-menu-vertical ul{margin-bottom:0}.wy-menu-vertical li.divide-top{border-top:1px solid #404040}.wy-menu-vertical li.divide-bottom{border-bottom:1px solid #404040}.wy-menu-vertical li.current{background:#e3e3e3}.wy-menu-vertical li.current a{color:grey;border-right:1px solid #c9c9c9;padding:.4045em 2.427em}.wy-menu-vertical li.current a:hover{background:#d6d6d6}.rst-content .wy-menu-vertical li tt,.wy-menu-vertical li .rst-content tt,.wy-menu-vertical li code{border:none;background:inherit;color:inherit;padding-left:0;padding-right:0}.wy-menu-vertical li button.toctree-expand{display:block;float:left;margin-left:-1.2em;line-height:18px;color:#4d4d4d;border:none;background:none;padding:0}.wy-menu-vertical li.current>a,.wy-menu-vertical li.on a{color:#404040;font-weight:700;position:relative;background:#fcfcfc;border:none;padding:.4045em 1.618em}.wy-menu-vertical li.current>a:hover,.wy-menu-vertical li.on a:hover{background:#fcfcfc}.wy-menu-vertical li.current>a:hover button.toctree-expand,.wy-menu-vertical li.on a:hover button.toctree-expand{color:grey}.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand{display:block;line-height:18px;color:#333}.wy-menu-vertical li.toctree-l1.current>a{border-bottom:1px solid #c9c9c9;border-top:1px solid #c9c9c9}.wy-menu-vertical .toctree-l1.current .toctree-l2>ul,.wy-menu-vertical .toctree-l2.current .toctree-l3>ul,.wy-menu-vertical .toctree-l3.current .toctree-l4>ul,.wy-menu-vertical .toctree-l4.current .toctree-l5>ul,.wy-menu-vertical .toctree-l5.current .toctree-l6>ul,.wy-menu-vertical .toctree-l6.current .toctree-l7>ul,.wy-menu-vertical .toctree-l7.current .toctree-l8>ul,.wy-menu-vertical .toctree-l8.current .toctree-l9>ul,.wy-menu-vertical .toctree-l9.current .toctree-l10>ul,.wy-menu-vertical .toctree-l10.current .toctree-l11>ul{display:none}.wy-menu-vertical .toctree-l1.current .current.toctree-l2>ul,.wy-menu-vertical .toctree-l2.current .current.toctree-l3>ul,.wy-menu-vertical .toctree-l3.current .current.toctree-l4>ul,.wy-menu-vertical .toctree-l4.current .current.toctree-l5>ul,.wy-menu-vertical .toctree-l5.current .current.toctree-l6>ul,.wy-menu-vertical .toctree-l6.current .current.toctree-l7>ul,.wy-menu-vertical .toctree-l7.current .current.toctree-l8>ul,.wy-menu-vertical .toctree-l8.current .current.toctree-l9>ul,.wy-menu-vertical .toctree-l9.current .current.toctree-l10>ul,.wy-menu-vertical .toctree-l10.current .current.toctree-l11>ul{display:block}.wy-menu-vertical li.toctree-l3,.wy-menu-vertical li.toctree-l4{font-size:.9em}.wy-menu-vertical li.toctree-l2 a,.wy-menu-vertical li.toctree-l3 a,.wy-menu-vertical li.toctree-l4 a,.wy-menu-vertical li.toctree-l5 a,.wy-menu-vertical li.toctree-l6 a,.wy-menu-vertical li.toctree-l7 a,.wy-menu-vertical li.toctree-l8 a,.wy-menu-vertical li.toctree-l9 a,.wy-menu-vertical li.toctree-l10 a{color:#404040}.wy-menu-vertical li.toctree-l2 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l3 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l4 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l5 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l6 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l7 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l8 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l9 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l10 a:hover button.toctree-expand{color:grey}.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a,.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a,.wy-menu-vertical li.toctree-l4.current li.toctree-l5>a,.wy-menu-vertical li.toctree-l5.current li.toctree-l6>a,.wy-menu-vertical li.toctree-l6.current li.toctree-l7>a,.wy-menu-vertical li.toctree-l7.current li.toctree-l8>a,.wy-menu-vertical li.toctree-l8.current li.toctree-l9>a,.wy-menu-vertical li.toctree-l9.current li.toctree-l10>a,.wy-menu-vertical li.toctree-l10.current li.toctree-l11>a{display:block}.wy-menu-vertical li.toctree-l2.current>a{padding:.4045em 2.427em}.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a{padding:.4045em 1.618em .4045em 4.045em}.wy-menu-vertical li.toctree-l3.current>a{padding:.4045em 4.045em}.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a{padding:.4045em 1.618em .4045em 5.663em}.wy-menu-vertical li.toctree-l4.current>a{padding:.4045em 5.663em}.wy-menu-vertical li.toctree-l4.current li.toctree-l5>a{padding:.4045em 1.618em .4045em 7.281em}.wy-menu-vertical li.toctree-l5.current>a{padding:.4045em 7.281em}.wy-menu-vertical li.toctree-l5.current li.toctree-l6>a{padding:.4045em 1.618em .4045em 8.899em}.wy-menu-vertical li.toctree-l6.current>a{padding:.4045em 8.899em}.wy-menu-vertical li.toctree-l6.current li.toctree-l7>a{padding:.4045em 1.618em .4045em 10.517em}.wy-menu-vertical li.toctree-l7.current>a{padding:.4045em 10.517em}.wy-menu-vertical li.toctree-l7.current li.toctree-l8>a{padding:.4045em 1.618em .4045em 12.135em}.wy-menu-vertical li.toctree-l8.current>a{padding:.4045em 12.135em}.wy-menu-vertical li.toctree-l8.current li.toctree-l9>a{padding:.4045em 1.618em .4045em 13.753em}.wy-menu-vertical li.toctree-l9.current>a{padding:.4045em 13.753em}.wy-menu-vertical li.toctree-l9.current li.toctree-l10>a{padding:.4045em 1.618em .4045em 15.371em}.wy-menu-vertical li.toctree-l10.current>a{padding:.4045em 15.371em}.wy-menu-vertical li.toctree-l10.current li.toctree-l11>a{padding:.4045em 1.618em .4045em 16.989em}.wy-menu-vertical li.toctree-l2.current>a,.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a{background:#c9c9c9}.wy-menu-vertical li.toctree-l2 button.toctree-expand{color:#a3a3a3}.wy-menu-vertical li.toctree-l3.current>a,.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a{background:#bdbdbd}.wy-menu-vertical li.toctree-l3 button.toctree-expand{color:#969696}.wy-menu-vertical li.current ul{display:block}.wy-menu-vertical li ul{margin-bottom:0;display:none}.wy-menu-vertical li ul li a{margin-bottom:0;color:#d9d9d9;font-weight:400}.wy-menu-vertical a{line-height:18px;padding:.4045em 1.618em;display:block;position:relative;font-size:90%;color:#d9d9d9}.wy-menu-vertical a:hover{background-color:#4e4a4a;cursor:pointer}.wy-menu-vertical a:hover button.toctree-expand{color:#d9d9d9}.wy-menu-vertical a:active{background-color:#2980b9;cursor:pointer;color:#fff}.wy-menu-vertical a:active button.toctree-expand{color:#fff}.wy-side-nav-search{display:block;width:300px;padding:.809em;margin-bottom:.809em;z-index:200;background-color:#2980b9;text-align:center;color:#fcfcfc}.wy-side-nav-search input[type=text]{width:100%;border-radius:50px;padding:6px 12px;border-color:#2472a4}.wy-side-nav-search img{display:block;margin:auto auto .809em;height:45px;width:45px;background-color:#2980b9;padding:5px;border-radius:100%}.wy-side-nav-search .wy-dropdown>a,.wy-side-nav-search>a{color:#fcfcfc;font-size:100%;font-weight:700;display:inline-block;padding:4px 6px;margin-bottom:.809em;max-width:100%}.wy-side-nav-search .wy-dropdown>a:hover,.wy-side-nav-search .wy-dropdown>aactive,.wy-side-nav-search .wy-dropdown>afocus,.wy-side-nav-search>a:hover,.wy-side-nav-search>aactive,.wy-side-nav-search>afocus{background:hsla(0,0%,100%,.1)}.wy-side-nav-search .wy-dropdown>a img.logo,.wy-side-nav-search>a img.logo{display:block;margin:0 auto;height:auto;width:auto;border-radius:0;max-width:100%;background:transparent}.wy-side-nav-search .wy-dropdown>a.icon,.wy-side-nav-search>a.icon{display:block}.wy-side-nav-search .wy-dropdown>a.icon img.logo,.wy-side-nav-search>a.icon img.logo{margin-top:.85em}.wy-side-nav-search>div.switch-menus{position:relative;display:block;margin-top:-.4045em;margin-bottom:.809em;font-weight:400;color:hsla(0,0%,100%,.3)}.wy-side-nav-search>div.switch-menus>div.language-switch,.wy-side-nav-search>div.switch-menus>div.version-switch{display:inline-block;padding:.2em}.wy-side-nav-search>div.switch-menus>div.language-switch select,.wy-side-nav-search>div.switch-menus>div.version-switch select{display:inline-block;margin-right:-2rem;padding-right:2rem;max-width:240px;text-align-last:center;background:none;border:none;border-radius:0;box-shadow:none;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;font-size:1em;font-weight:400;color:hsla(0,0%,100%,.3);cursor:pointer;appearance:none;-webkit-appearance:none;-moz-appearance:none}.wy-side-nav-search>div.switch-menus>div.language-switch select:active,.wy-side-nav-search>div.switch-menus>div.language-switch select:focus,.wy-side-nav-search>div.switch-menus>div.language-switch select:hover,.wy-side-nav-search>div.switch-menus>div.version-switch select:active,.wy-side-nav-search>div.switch-menus>div.version-switch select:focus,.wy-side-nav-search>div.switch-menus>div.version-switch select:hover{background:hsla(0,0%,100%,.1);color:hsla(0,0%,100%,.5)}.wy-side-nav-search>div.switch-menus>div.language-switch select option,.wy-side-nav-search>div.switch-menus>div.version-switch select option{color:#000}.wy-side-nav-search>div.switch-menus>div.language-switch:has(>select):after,.wy-side-nav-search>div.switch-menus>div.version-switch:has(>select):after{display:inline-block;width:1.5em;height:100%;padding:.1em;content:"\f0d7";font-size:1em;line-height:1.2em;font-family:FontAwesome;text-align:center;pointer-events:none;box-sizing:border-box}.wy-nav .wy-menu-vertical header{color:#2980b9}.wy-nav .wy-menu-vertical a{color:#b3b3b3}.wy-nav .wy-menu-vertical a:hover{background-color:#2980b9;color:#fff}[data-menu-wrap]{-webkit-transition:all .2s ease-in;-moz-transition:all .2s ease-in;transition:all .2s ease-in;position:absolute;opacity:1;width:100%;opacity:0}[data-menu-wrap].move-center{left:0;right:auto;opacity:1}[data-menu-wrap].move-left{right:auto;left:-100%;opacity:0}[data-menu-wrap].move-right{right:-100%;left:auto;opacity:0}.wy-body-for-nav{background:#fcfcfc}.wy-grid-for-nav{position:absolute;width:100%;height:100%}.wy-nav-side{position:fixed;top:0;bottom:0;left:0;padding-bottom:2em;width:300px;overflow-x:hidden;overflow-y:hidden;min-height:100%;color:#9b9b9b;background:#343131;z-index:200}.wy-side-scroll{width:320px;position:relative;overflow-x:hidden;overflow-y:scroll;height:100%}.wy-nav-top{display:none;background:#2980b9;color:#fff;padding:.4045em .809em;position:relative;line-height:50px;text-align:center;font-size:100%;*zoom:1}.wy-nav-top:after,.wy-nav-top:before{display:table;content:""}.wy-nav-top:after{clear:both}.wy-nav-top a{color:#fff;font-weight:700}.wy-nav-top img{margin-right:12px;height:45px;width:45px;background-color:#2980b9;padding:5px;border-radius:100%}.wy-nav-top i{font-size:30px;float:left;cursor:pointer;padding-top:inherit}.wy-nav-content-wrap{margin-left:300px;background:#fcfcfc;min-height:100%}.wy-nav-content{padding:1.618em 3.236em;height:100%;max-width:800px;margin:auto}.wy-body-mask{position:fixed;width:100%;height:100%;background:rgba(0,0,0,.2);display:none;z-index:499}.wy-body-mask.on{display:block}footer{color:grey}footer p{margin-bottom:12px}.rst-content footer span.commit tt,footer span.commit .rst-content tt,footer span.commit code{padding:0;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;font-size:1em;background:none;border:none;color:grey}.rst-footer-buttons{*zoom:1}.rst-footer-buttons:after,.rst-footer-buttons:before{width:100%;display:table;content:""}.rst-footer-buttons:after{clear:both}.rst-breadcrumbs-buttons{margin-top:12px;*zoom:1}.rst-breadcrumbs-buttons:after,.rst-breadcrumbs-buttons:before{display:table;content:""}.rst-breadcrumbs-buttons:after{clear:both}#search-results .search li{margin-bottom:24px;border-bottom:1px solid #e1e4e5;padding-bottom:24px}#search-results .search li:first-child{border-top:1px solid #e1e4e5;padding-top:24px}#search-results .search li a{font-size:120%;margin-bottom:12px;display:inline-block}#search-results .context{color:grey;font-size:90%}.genindextable li>ul{margin-left:24px}@media screen and (max-width:768px){.wy-body-for-nav{background:#fcfcfc}.wy-nav-top{display:block}.wy-nav-side{left:-300px}.wy-nav-side.shift{width:85%;left:0}.wy-menu.wy-menu-vertical,.wy-side-nav-search,.wy-side-scroll{width:auto}.wy-nav-content-wrap{margin-left:0}.wy-nav-content-wrap .wy-nav-content{padding:1.618em}.wy-nav-content-wrap.shift{position:fixed;min-width:100%;left:85%;top:0;height:100%;overflow:hidden}}@media screen and (min-width:1100px){.wy-nav-content-wrap{background:rgba(0,0,0,.05)}.wy-nav-content{margin:0;background:#fcfcfc}}@media print{.rst-versions,.wy-nav-side,footer{display:none}.wy-nav-content-wrap{margin-left:0}}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60;*zoom:1}.rst-versions .rst-current-version:after,.rst-versions .rst-current-version:before{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-content .code-block-caption .rst-versions .rst-current-version .headerlink,.rst-content .eqno .rst-versions .rst-current-version .headerlink,.rst-content .rst-versions .rst-current-version .admonition-title,.rst-content code.download .rst-versions .rst-current-version span:first-child,.rst-content dl dt .rst-versions .rst-current-version .headerlink,.rst-content h1 .rst-versions .rst-current-version .headerlink,.rst-content h2 .rst-versions .rst-current-version .headerlink,.rst-content h3 .rst-versions .rst-current-version .headerlink,.rst-content h4 .rst-versions .rst-current-version .headerlink,.rst-content h5 .rst-versions .rst-current-version .headerlink,.rst-content h6 .rst-versions .rst-current-version .headerlink,.rst-content p .rst-versions .rst-current-version .headerlink,.rst-content table>caption .rst-versions .rst-current-version .headerlink,.rst-content tt.download .rst-versions .rst-current-version span:first-child,.rst-versions .rst-current-version .fa,.rst-versions .rst-current-version .icon,.rst-versions .rst-current-version .rst-content .admonition-title,.rst-versions .rst-current-version .rst-content .code-block-caption .headerlink,.rst-versions .rst-current-version .rst-content .eqno .headerlink,.rst-versions .rst-current-version .rst-content code.download span:first-child,.rst-versions .rst-current-version .rst-content dl dt .headerlink,.rst-versions .rst-current-version .rst-content h1 .headerlink,.rst-versions .rst-current-version .rst-content h2 .headerlink,.rst-versions .rst-current-version .rst-content h3 .headerlink,.rst-versions .rst-current-version .rst-content h4 .headerlink,.rst-versions .rst-current-version .rst-content h5 .headerlink,.rst-versions .rst-current-version .rst-content h6 .headerlink,.rst-versions .rst-current-version .rst-content p .headerlink,.rst-versions .rst-current-version .rst-content table>caption .headerlink,.rst-versions .rst-current-version .rst-content tt.download span:first-child,.rst-versions .rst-current-version .wy-menu-vertical li button.toctree-expand,.wy-menu-vertical li .rst-versions .rst-current-version button.toctree-expand{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions .rst-other-versions .rtd-current-item{font-weight:700}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}#flyout-search-form{padding:6px}.rst-content .toctree-wrapper>p.caption,.rst-content h1,.rst-content h2,.rst-content h3,.rst-content h4,.rst-content h5,.rst-content h6{margin-bottom:24px}.rst-content img{max-width:100%;height:auto}.rst-content div.figure,.rst-content figure{margin-bottom:24px}.rst-content div.figure .caption-text,.rst-content figure .caption-text{font-style:italic}.rst-content div.figure p:last-child.caption,.rst-content figure p:last-child.caption{margin-bottom:0}.rst-content div.figure.align-center,.rst-content figure.align-center{text-align:center}.rst-content .section>a>img,.rst-content .section>img,.rst-content section>a>img,.rst-content section>img{margin-bottom:24px}.rst-content abbr[title]{text-decoration:none}.rst-content.style-external-links a.reference.external:after{font-family:FontAwesome;content:"\f08e";color:#b3b3b3;vertical-align:super;font-size:60%;margin:0 .2em}.rst-content blockquote{margin-left:24px;line-height:24px;margin-bottom:24px}.rst-content pre.literal-block{white-space:pre;margin:0;padding:12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;display:block;overflow:auto}.rst-content div[class^=highlight],.rst-content pre.literal-block{border:1px solid #e1e4e5;overflow-x:auto;margin:1px 0 24px}.rst-content div[class^=highlight] div[class^=highlight],.rst-content pre.literal-block div[class^=highlight]{padding:0;border:none;margin:0}.rst-content div[class^=highlight] td.code{width:100%}.rst-content .linenodiv pre{border-right:1px solid #e6e9ea;margin:0;padding:12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;user-select:none;pointer-events:none}.rst-content div[class^=highlight] pre{white-space:pre;margin:0;padding:12px;display:block;overflow:auto}.rst-content div[class^=highlight] pre .hll{display:block;margin:0 -12px;padding:0 12px}.rst-content .linenodiv pre,.rst-content div[class^=highlight] pre,.rst-content pre.literal-block{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;font-size:12px;line-height:1.4}.rst-content div.highlight .gp,.rst-content div.highlight span.linenos{user-select:none;pointer-events:none}.rst-content div.highlight span.linenos{display:inline-block;padding-left:0;padding-right:12px;margin-right:12px;border-right:1px solid #e6e9ea}.rst-content .code-block-caption{font-style:italic;font-size:85%;line-height:1;padding:1em 0;text-align:center}@media print{.rst-content .codeblock,.rst-content div[class^=highlight],.rst-content div[class^=highlight] pre{white-space:pre-wrap}}.rst-content .admonition,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning{clear:both}.rst-content .admonition-todo .last,.rst-content .admonition-todo>:last-child,.rst-content .admonition .last,.rst-content .admonition>:last-child,.rst-content .attention .last,.rst-content .attention>:last-child,.rst-content .caution .last,.rst-content .caution>:last-child,.rst-content .danger .last,.rst-content .danger>:last-child,.rst-content .error .last,.rst-content .error>:last-child,.rst-content .hint .last,.rst-content .hint>:last-child,.rst-content .important .last,.rst-content .important>:last-child,.rst-content .note .last,.rst-content .note>:last-child,.rst-content .seealso .last,.rst-content .seealso>:last-child,.rst-content .tip .last,.rst-content .tip>:last-child,.rst-content .warning .last,.rst-content .warning>:last-child{margin-bottom:0}.rst-content .admonition-title:before{margin-right:4px}.rst-content .admonition table{border-color:rgba(0,0,0,.1)}.rst-content .admonition table td,.rst-content .admonition table th{background:transparent!important;border-color:rgba(0,0,0,.1)!important}.rst-content .section ol.loweralpha,.rst-content .section ol.loweralpha>li,.rst-content .toctree-wrapper ol.loweralpha,.rst-content .toctree-wrapper ol.loweralpha>li,.rst-content section ol.loweralpha,.rst-content section ol.loweralpha>li{list-style:lower-alpha}.rst-content .section ol.upperalpha,.rst-content .section ol.upperalpha>li,.rst-content .toctree-wrapper ol.upperalpha,.rst-content .toctree-wrapper ol.upperalpha>li,.rst-content section ol.upperalpha,.rst-content section ol.upperalpha>li{list-style:upper-alpha}.rst-content .section ol li>*,.rst-content .section ul li>*,.rst-content .toctree-wrapper ol li>*,.rst-content .toctree-wrapper ul li>*,.rst-content section ol li>*,.rst-content section ul li>*{margin-top:12px;margin-bottom:12px}.rst-content .section ol li>:first-child,.rst-content .section ul li>:first-child,.rst-content .toctree-wrapper ol li>:first-child,.rst-content .toctree-wrapper ul li>:first-child,.rst-content section ol li>:first-child,.rst-content section ul li>:first-child{margin-top:0}.rst-content .section ol li>p,.rst-content .section ol li>p:last-child,.rst-content .section ul li>p,.rst-content .section ul li>p:last-child,.rst-content .toctree-wrapper ol li>p,.rst-content .toctree-wrapper ol li>p:last-child,.rst-content .toctree-wrapper ul li>p,.rst-content .toctree-wrapper ul li>p:last-child,.rst-content section ol li>p,.rst-content section ol li>p:last-child,.rst-content section ul li>p,.rst-content section ul li>p:last-child{margin-bottom:12px}.rst-content .section ol li>p:only-child,.rst-content .section ol li>p:only-child:last-child,.rst-content .section ul li>p:only-child,.rst-content .section ul li>p:only-child:last-child,.rst-content .toctree-wrapper ol li>p:only-child,.rst-content .toctree-wrapper ol li>p:only-child:last-child,.rst-content .toctree-wrapper ul li>p:only-child,.rst-content .toctree-wrapper ul li>p:only-child:last-child,.rst-content section ol li>p:only-child,.rst-content section ol li>p:only-child:last-child,.rst-content section ul li>p:only-child,.rst-content section ul li>p:only-child:last-child{margin-bottom:0}.rst-content .section ol li>ol,.rst-content .section ol li>ul,.rst-content .section ul li>ol,.rst-content .section ul li>ul,.rst-content .toctree-wrapper ol li>ol,.rst-content .toctree-wrapper ol li>ul,.rst-content .toctree-wrapper ul li>ol,.rst-content .toctree-wrapper ul li>ul,.rst-content section ol li>ol,.rst-content section ol li>ul,.rst-content section ul li>ol,.rst-content section ul li>ul{margin-bottom:12px}.rst-content .section ol.simple li>*,.rst-content .section ol.simple li ol,.rst-content .section ol.simple li ul,.rst-content .section ul.simple li>*,.rst-content .section ul.simple li ol,.rst-content .section ul.simple li ul,.rst-content .toctree-wrapper ol.simple li>*,.rst-content .toctree-wrapper ol.simple li ol,.rst-content .toctree-wrapper ol.simple li ul,.rst-content .toctree-wrapper ul.simple li>*,.rst-content .toctree-wrapper ul.simple li ol,.rst-content .toctree-wrapper ul.simple li ul,.rst-content section ol.simple li>*,.rst-content section ol.simple li ol,.rst-content section ol.simple li ul,.rst-content section ul.simple li>*,.rst-content section ul.simple li ol,.rst-content section ul.simple li ul{margin-top:0;margin-bottom:0}.rst-content .line-block{margin-left:0;margin-bottom:24px;line-height:24px}.rst-content .line-block .line-block{margin-left:24px;margin-bottom:0}.rst-content .topic-title{font-weight:700;margin-bottom:12px}.rst-content .toc-backref{color:#404040}.rst-content .align-right{float:right;margin:0 0 24px 24px}.rst-content .align-left{float:left;margin:0 24px 24px 0}.rst-content .align-center{margin:auto}.rst-content .align-center:not(table){display:block}.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content .toctree-wrapper>p.caption .headerlink,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink{opacity:0;font-size:14px;font-family:FontAwesome;margin-left:.5em}.rst-content .code-block-caption .headerlink:focus,.rst-content .code-block-caption:hover .headerlink,.rst-content .eqno .headerlink:focus,.rst-content .eqno:hover .headerlink,.rst-content .toctree-wrapper>p.caption .headerlink:focus,.rst-content .toctree-wrapper>p.caption:hover .headerlink,.rst-content dl dt .headerlink:focus,.rst-content dl dt:hover .headerlink,.rst-content h1 .headerlink:focus,.rst-content h1:hover .headerlink,.rst-content h2 .headerlink:focus,.rst-content h2:hover .headerlink,.rst-content h3 .headerlink:focus,.rst-content h3:hover .headerlink,.rst-content h4 .headerlink:focus,.rst-content h4:hover .headerlink,.rst-content h5 .headerlink:focus,.rst-content h5:hover .headerlink,.rst-content h6 .headerlink:focus,.rst-content h6:hover .headerlink,.rst-content p.caption .headerlink:focus,.rst-content p.caption:hover .headerlink,.rst-content p .headerlink:focus,.rst-content p:hover .headerlink,.rst-content table>caption .headerlink:focus,.rst-content table>caption:hover .headerlink{opacity:1}.rst-content p a{overflow-wrap:anywhere}.rst-content .wy-table td p,.rst-content .wy-table td ul,.rst-content .wy-table th p,.rst-content .wy-table th ul,.rst-content table.docutils td p,.rst-content table.docutils td ul,.rst-content table.docutils th p,.rst-content table.docutils th ul,.rst-content table.field-list td p,.rst-content table.field-list td ul,.rst-content table.field-list th p,.rst-content table.field-list th ul{font-size:inherit}.rst-content .btn:focus{outline:2px solid}.rst-content table>caption .headerlink:after{font-size:12px}.rst-content .centered{text-align:center}.rst-content .sidebar{float:right;width:40%;display:block;margin:0 0 24px 24px;padding:24px;background:#f3f6f6;border:1px solid #e1e4e5}.rst-content .sidebar dl,.rst-content .sidebar p,.rst-content .sidebar ul{font-size:90%}.rst-content .sidebar .last,.rst-content .sidebar>:last-child{margin-bottom:0}.rst-content .sidebar .sidebar-title{display:block;font-family:Roboto Slab,ff-tisa-web-pro,Georgia,Arial,sans-serif;font-weight:700;background:#e1e4e5;padding:6px 12px;margin:-24px -24px 24px;font-size:100%}.rst-content .highlighted{background:#f1c40f;box-shadow:0 0 0 2px #f1c40f;display:inline;font-weight:700}.rst-content .citation-reference,.rst-content .footnote-reference{vertical-align:baseline;position:relative;top:-.4em;line-height:0;font-size:90%}.rst-content .citation-reference>span.fn-bracket,.rst-content .footnote-reference>span.fn-bracket{display:none}.rst-content .hlist{width:100%}.rst-content dl dt span.classifier:before{content:" : "}.rst-content dl dt span.classifier-delimiter{display:none!important}html.writer-html4 .rst-content table.docutils.citation,html.writer-html4 .rst-content table.docutils.footnote{background:none;border:none}html.writer-html4 .rst-content table.docutils.citation td,html.writer-html4 .rst-content table.docutils.citation tr,html.writer-html4 .rst-content table.docutils.footnote td,html.writer-html4 .rst-content table.docutils.footnote tr{border:none;background-color:transparent!important;white-space:normal}html.writer-html4 .rst-content table.docutils.citation td.label,html.writer-html4 .rst-content table.docutils.footnote td.label{padding-left:0;padding-right:0;vertical-align:top}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.field-list,html.writer-html5 .rst-content dl.footnote{display:grid;grid-template-columns:auto minmax(80%,95%)}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dt{display:inline-grid;grid-template-columns:max-content auto}html.writer-html5 .rst-content aside.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content div.citation{display:grid;grid-template-columns:auto auto minmax(.65rem,auto) minmax(40%,95%)}html.writer-html5 .rst-content aside.citation>span.label,html.writer-html5 .rst-content aside.footnote>span.label,html.writer-html5 .rst-content div.citation>span.label{grid-column-start:1;grid-column-end:2}html.writer-html5 .rst-content aside.citation>span.backrefs,html.writer-html5 .rst-content aside.footnote>span.backrefs,html.writer-html5 .rst-content div.citation>span.backrefs{grid-column-start:2;grid-column-end:3;grid-row-start:1;grid-row-end:3}html.writer-html5 .rst-content aside.citation>p,html.writer-html5 .rst-content aside.footnote>p,html.writer-html5 .rst-content div.citation>p{grid-column-start:4;grid-column-end:5}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.field-list,html.writer-html5 .rst-content dl.footnote{margin-bottom:24px}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dt{padding-left:1rem}html.writer-html5 .rst-content dl.citation>dd,html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dd,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dd,html.writer-html5 .rst-content dl.footnote>dt{margin-bottom:0}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.footnote{font-size:.9rem}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.footnote>dt{margin:0 .5rem .5rem 0;line-height:1.2rem;word-break:break-all;font-weight:400}html.writer-html5 .rst-content dl.citation>dt>span.brackets:before,html.writer-html5 .rst-content dl.footnote>dt>span.brackets:before{content:"["}html.writer-html5 .rst-content dl.citation>dt>span.brackets:after,html.writer-html5 .rst-content dl.footnote>dt>span.brackets:after{content:"]"}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref{text-align:left;font-style:italic;margin-left:.65rem;word-break:break-word;word-spacing:-.1rem;max-width:5rem}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref>a,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref>a{word-break:keep-all}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref>a:not(:first-child):before,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref>a:not(:first-child):before{content:" "}html.writer-html5 .rst-content dl.citation>dd,html.writer-html5 .rst-content dl.footnote>dd{margin:0 0 .5rem;line-height:1.2rem}html.writer-html5 .rst-content dl.citation>dd p,html.writer-html5 .rst-content dl.footnote>dd p{font-size:.9rem}html.writer-html5 .rst-content aside.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content div.citation{padding-left:1rem;padding-right:1rem;font-size:.9rem;line-height:1.2rem}html.writer-html5 .rst-content aside.citation p,html.writer-html5 .rst-content aside.footnote p,html.writer-html5 .rst-content div.citation p{font-size:.9rem;line-height:1.2rem;margin-bottom:12px}html.writer-html5 .rst-content aside.citation span.backrefs,html.writer-html5 .rst-content aside.footnote span.backrefs,html.writer-html5 .rst-content div.citation span.backrefs{text-align:left;font-style:italic;margin-left:.65rem;word-break:break-word;word-spacing:-.1rem;max-width:5rem}html.writer-html5 .rst-content aside.citation span.backrefs>a,html.writer-html5 .rst-content aside.footnote span.backrefs>a,html.writer-html5 .rst-content div.citation span.backrefs>a{word-break:keep-all}html.writer-html5 .rst-content aside.citation span.backrefs>a:not(:first-child):before,html.writer-html5 .rst-content aside.footnote span.backrefs>a:not(:first-child):before,html.writer-html5 .rst-content div.citation span.backrefs>a:not(:first-child):before{content:" "}html.writer-html5 .rst-content aside.citation span.label,html.writer-html5 .rst-content aside.footnote span.label,html.writer-html5 .rst-content div.citation span.label{line-height:1.2rem}html.writer-html5 .rst-content aside.citation-list,html.writer-html5 .rst-content aside.footnote-list,html.writer-html5 .rst-content div.citation-list{margin-bottom:24px}html.writer-html5 .rst-content dl.option-list kbd{font-size:.9rem}.rst-content table.docutils.footnote,html.writer-html4 .rst-content table.docutils.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content aside.footnote-list aside.footnote,html.writer-html5 .rst-content div.citation-list>div.citation,html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.footnote{color:grey}.rst-content table.docutils.footnote code,.rst-content table.docutils.footnote tt,html.writer-html4 .rst-content table.docutils.citation code,html.writer-html4 .rst-content table.docutils.citation tt,html.writer-html5 .rst-content aside.footnote-list aside.footnote code,html.writer-html5 .rst-content aside.footnote-list aside.footnote tt,html.writer-html5 .rst-content aside.footnote code,html.writer-html5 .rst-content aside.footnote tt,html.writer-html5 .rst-content div.citation-list>div.citation code,html.writer-html5 .rst-content div.citation-list>div.citation tt,html.writer-html5 .rst-content dl.citation code,html.writer-html5 .rst-content dl.citation tt,html.writer-html5 .rst-content dl.footnote code,html.writer-html5 .rst-content dl.footnote tt{color:#555}.rst-content .wy-table-responsive.citation,.rst-content .wy-table-responsive.footnote{margin-bottom:0}.rst-content .wy-table-responsive.citation+:not(.citation),.rst-content .wy-table-responsive.footnote+:not(.footnote){margin-top:24px}.rst-content .wy-table-responsive.citation:last-child,.rst-content .wy-table-responsive.footnote:last-child{margin-bottom:24px}.rst-content table.docutils th{border-color:#e1e4e5}html.writer-html5 .rst-content table.docutils th{border:1px solid #e1e4e5}html.writer-html5 .rst-content table.docutils td>p,html.writer-html5 .rst-content table.docutils th>p{line-height:1rem;margin-bottom:0;font-size:.9rem}.rst-content table.docutils td .last,.rst-content table.docutils td .last>:last-child{margin-bottom:0}.rst-content table.field-list,.rst-content table.field-list td{border:none}.rst-content table.field-list td p{line-height:inherit}.rst-content table.field-list td>strong{display:inline-block}.rst-content table.field-list .field-name{padding-right:10px;text-align:left;white-space:nowrap}.rst-content table.field-list .field-body{text-align:left}.rst-content code,.rst-content tt{color:#000;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;padding:2px 5px}.rst-content code big,.rst-content code em,.rst-content tt big,.rst-content tt em{font-size:100%!important;line-height:normal}.rst-content code.literal,.rst-content tt.literal{color:#e74c3c;white-space:normal}.rst-content code.xref,.rst-content tt.xref,a .rst-content code,a .rst-content tt{font-weight:700;color:#404040;overflow-wrap:normal}.rst-content kbd,.rst-content pre,.rst-content samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace}.rst-content a code,.rst-content a tt{color:#2980b9}.rst-content dl{margin-bottom:24px}.rst-content dl dt{font-weight:700;margin-bottom:12px}.rst-content dl ol,.rst-content dl p,.rst-content dl table,.rst-content dl ul{margin-bottom:12px}.rst-content dl dd{margin:0 0 12px 24px;line-height:24px}.rst-content dl dd>ol:last-child,.rst-content dl dd>p:last-child,.rst-content dl dd>table:last-child,.rst-content dl dd>ul:last-child{margin-bottom:0}html.writer-html4 .rst-content dl:not(.docutils),html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple){margin-bottom:24px}html.writer-html4 .rst-content dl:not(.docutils)>dt,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt{display:table;margin:6px 0;font-size:90%;line-height:normal;background:#e7f2fa;color:#2980b9;border-top:3px solid #6ab0de;padding:6px;position:relative}html.writer-html4 .rst-content dl:not(.docutils)>dt:before,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt:before{color:#6ab0de}html.writer-html4 .rst-content dl:not(.docutils)>dt .headerlink,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink{color:#404040;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt{margin-bottom:6px;border:none;border-left:3px solid #ccc;background:#f0f0f0;color:#555}html.writer-html4 .rst-content dl:not(.docutils) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink{color:#404040;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils)>dt:first-child,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt:first-child{margin-top:0}html.writer-html4 .rst-content dl:not(.docutils) code.descclassname,html.writer-html4 .rst-content dl:not(.docutils) code.descname,html.writer-html4 .rst-content dl:not(.docutils) tt.descclassname,html.writer-html4 .rst-content dl:not(.docutils) tt.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descname{background-color:transparent;border:none;padding:0;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils) code.descname,html.writer-html4 .rst-content dl:not(.docutils) tt.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descname{font-weight:700}html.writer-html4 .rst-content dl:not(.docutils) .optional,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .optional{display:inline-block;padding:0 4px;color:#000;font-weight:700}html.writer-html4 .rst-content dl:not(.docutils) .property,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .property{display:inline-block;padding-right:8px;max-width:100%}html.writer-html4 .rst-content dl:not(.docutils) .k,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .k{font-style:italic}html.writer-html4 .rst-content dl:not(.docutils) .descclassname,html.writer-html4 .rst-content dl:not(.docutils) .descname,html.writer-html4 .rst-content dl:not(.docutils) .sig-name,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .sig-name{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;color:#000}.rst-content .viewcode-back,.rst-content .viewcode-link{display:inline-block;color:#27ae60;font-size:80%;padding-left:24px}.rst-content .viewcode-back{display:block;float:right}.rst-content p.rubric{margin-bottom:12px;font-weight:700}.rst-content code.download,.rst-content tt.download{background:inherit;padding:inherit;font-weight:400;font-family:inherit;font-size:inherit;color:inherit;border:inherit;white-space:inherit}.rst-content code.download span:first-child,.rst-content tt.download span:first-child{-webkit-font-smoothing:subpixel-antialiased}.rst-content code.download span:first-child:before,.rst-content tt.download span:first-child:before{margin-right:4px}.rst-content .guilabel,.rst-content .menuselection{font-size:80%;font-weight:700;border-radius:4px;padding:2.4px 6px;margin:auto 2px}.rst-content .guilabel,.rst-content .menuselection{border:1px solid #7fbbe3;background:#e7f2fa}.rst-content :not(dl.option-list)>:not(dt):not(kbd):not(.kbd)>.kbd,.rst-content :not(dl.option-list)>:not(dt):not(kbd):not(.kbd)>kbd{color:inherit;font-size:80%;background-color:#fff;border:1px solid #a6a6a6;border-radius:4px;box-shadow:0 2px grey;padding:2.4px 6px;margin:auto 0}.rst-content .versionmodified{font-style:italic}@media screen and (max-width:480px){.rst-content .sidebar{width:100%}}span[id*=MathJax-Span]{color:#404040}.math{text-align:center}@font-face{font-family:Lato;src:url(fonts/lato-normal.woff2?bd03a2cc277bbbc338d464e679fe9942) format("woff2"),url(fonts/lato-normal.woff?27bd77b9162d388cb8d4c4217c7c5e2a) format("woff");font-weight:400;font-style:normal;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-bold.woff2?cccb897485813c7c256901dbca54ecf2) format("woff2"),url(fonts/lato-bold.woff?d878b6c29b10beca227e9eef4246111b) format("woff");font-weight:700;font-style:normal;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-bold-italic.woff2?0b6bb6725576b072c5d0b02ecdd1900d) format("woff2"),url(fonts/lato-bold-italic.woff?9c7e4e9eb485b4a121c760e61bc3707c) format("woff");font-weight:700;font-style:italic;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-normal-italic.woff2?4eb103b4d12be57cb1d040ed5e162e9d) format("woff2"),url(fonts/lato-normal-italic.woff?f28f2d6482446544ef1ea1ccc6dd5892) format("woff");font-weight:400;font-style:italic;font-display:block}@font-face{font-family:Roboto Slab;font-style:normal;font-weight:400;src:url(fonts/Roboto-Slab-Regular.woff2?7abf5b8d04d26a2cafea937019bca958) format("woff2"),url(fonts/Roboto-Slab-Regular.woff?c1be9284088d487c5e3ff0a10a92e58c) format("woff");font-display:block}@font-face{font-family:Roboto Slab;font-style:normal;font-weight:700;src:url(fonts/Roboto-Slab-Bold.woff2?9984f4a9bda09be08e83f2506954adbe) format("woff2"),url(fonts/Roboto-Slab-Bold.woff?bed5564a116b05148e3b3bea6fb1162a) format("woff");font-display:block} \ No newline at end of file diff --git a/_static/custom.css b/_static/custom.css new file mode 100644 index 0000000..7c01143 --- /dev/null +++ b/_static/custom.css @@ -0,0 +1,303 @@ +/* make the page 1000px */ +.wy-nav-content { + max-width: 1200px; + } + + /* code block highlight color in rtd changed to lime green, no no no */ + + .rst-content tt.literal, .rst-content code.literal, .highlight { + background: #f0f0f0; + } + .rst-content tt.literal, .rst-content code.literal { + color: #000000; + } + + /* Footer privacy links */ + a#wap_dns { + display: none; + } + + a#wap_nac { + display: none; + } + + /* replace the copyright to eliminate the copyright symbol enforced by + the ReadTheDocs theme */ + div[role=contentinfo] { + visibility: hidden; + position: absolute; + } + /* set color of version navigator to yellow */ + div.version a { + color: hsla(0,0%,100%,.3); + } + div.version p { + color: #FFFF00; + } + + .menu { + display: flex; + flex-wrap: wrap; + width: 60%; + margin: auto; + padding: 10px 24px 24px 24px; + } + + .menu-element { + flex: 1 1 48%; + background-color: #F3F4F7; + color: black; + text-align: center; + margin: 2px; + padding: 10px 24px; + cursor: pointer; + } + + @media screen and (max-width: 768px) { + .menu-element { + width: 100%; + } + } + + .menu-element:hover { + background-color: #c41c24; + color: white; + } + + .menu-element.selected { + background-color: #c41c24; + color: white; + } + + + + .row { + display: flex; + flex-wrap: wrap; + width: 100%; + margin: auto; + padding: 0px 2px; + } + + .row-element-1 { + flex: 1 1 20%; + display: block; + margin-top: auto; + margin-bottom: auto; + } + + .row-element-2 { + flex: 1 1 80%; + margin-top: auto; + margin-bottom: auto; + } + + .mobile-headings { + flex: 1 1 auto; + color: black; + text-align: left; + margin: 2px; + padding: 10px 24px; + display: none; + } + + .headings-element { + text-align: left; + margin: 2px; + padding: 10px 24px; + } + + .values-element { + background-color: #F3F4F7; + color: black; + text-align: left; + margin: 2px; + padding: 10px 24px; + cursor: pointer; + } + + .values-element-disabled { + background-color: #F3F4F7; + color: black; + text-align: left; + margin: 2px; + padding: 10px 24px; + cursor: pointer; + } + + .block-1 { + flex: 1 1 100%; + } + + .block-2 { + flex: 1 1 49%; + } + + .block-3 { + flex: 1 1 32%; + } + + .block-4 { + flex: 1 1 24%; + } + + .block-5 { + flex: 1 1 19%; + } + + @media screen and (max-width: 1050px) { + .row-element-1 { + display: none; + } + + .row-element-2 { + flex: 1 1 100%; + } + + .mobile-headings { + display: block; + } + + .block-2,.block-3,.block-4,.block-5 { + flex: 1 1 100%; + } + } + + .values-element:hover { + background-color: #c41c24; + color: white; + } + + .values-element.selected { + background-color: #c41c24; + color: white; + } + + #install-instructions { + padding-top: 50px; + } + + .container { + max-width: 100%; + margin-left: auto; + margin-right: auto; + padding-left: 1rem; + padding-right: 1rem; +} + +.bg-white { + background-color: #ffffff; +} + +.flex { + display: flex; +} + +.flex-col { + flex-direction: column; +} + +.items-center { + align-items: center; +} + +.grid { + display: grid; +} + +.grid-cols-1 { + grid-template-columns: repeat(1, minmax(0, 1fr)); +} + +@media (min-width: 768px) { + .md\:grid-cols-3 { + grid-template-columns: repeat(3, minmax(0, 1fr)); + } +} + +.gap-8 { + gap: 2rem; +} + +.rounded-lg { + border-radius: 0.5rem; +} + +.p-4 { + padding: 1rem; +} + +.mb-4 { + margin-bottom: 1rem; +} + +.w-16 { + width: 4rem; +} + +.h-16 { + height: 4rem; +} + +.rounded-md { + border-radius: 0.375rem; +} + +.mr-4 { + margin-right: 1rem; +} + +.text-lg { + font-size: 1.125rem; + line-height: 1.75rem; +} + +.font-semibold { + font-weight: 600; +} + +.text-gray-600 { + color: #4b5563; +} + +.space-x-4 > :not([hidden]) ~ :not([hidden]) { + --tw-space-x-reverse: 0; + margin-right: calc(1rem * var(--tw-space-x-reverse)); + margin-left: calc(1rem * calc(1 - var(--tw-space-x-reverse))); +} + +.text-blue-600 { + color: #2563eb; +} + +.flex-grow { + flex-grow: 1; +} + +.container { + background-color: transparent; + padding: 3%; + padding-top: 5%; + overflow-y: hidden; + height: 100%; +} +.box { + background-color: white; +} +.img { + background-size: contain; + background-repeat: no-repeat; +} +.desc { + font-size: small; +} +.split { + color: black; +} +.gap { + margin-bottom: 5%; +} + +.box .flex-grow { + flex-grow: 1; /* 让文本内容占据剩余空间 */ +} \ No newline at end of file diff --git a/_static/doctools.js b/_static/doctools.js new file mode 100644 index 0000000..0398ebb --- /dev/null +++ b/_static/doctools.js @@ -0,0 +1,149 @@ +/* + * Base JavaScript utilities for all Sphinx HTML documentation. + */ +"use strict"; + +const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ + "TEXTAREA", + "INPUT", + "SELECT", + "BUTTON", +]); + +const _ready = (callback) => { + if (document.readyState !== "loading") { + callback(); + } else { + document.addEventListener("DOMContentLoaded", callback); + } +}; + +/** + * Small JavaScript module for the documentation. + */ +const Documentation = { + init: () => { + Documentation.initDomainIndexTable(); + Documentation.initOnKeyListeners(); + }, + + /** + * i18n support + */ + TRANSLATIONS: {}, + PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), + LOCALE: "unknown", + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext: (string) => { + const translated = Documentation.TRANSLATIONS[string]; + switch (typeof translated) { + case "undefined": + return string; // no translation + case "string": + return translated; // translation exists + default: + return translated[0]; // (singular, plural) translation tuple exists + } + }, + + ngettext: (singular, plural, n) => { + const translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated !== "undefined") + return translated[Documentation.PLURAL_EXPR(n)]; + return n === 1 ? singular : plural; + }, + + addTranslations: (catalog) => { + Object.assign(Documentation.TRANSLATIONS, catalog.messages); + Documentation.PLURAL_EXPR = new Function( + "n", + `return (${catalog.plural_expr})` + ); + Documentation.LOCALE = catalog.locale; + }, + + /** + * helper function to focus on search bar + */ + focusSearchBar: () => { + document.querySelectorAll("input[name=q]")[0]?.focus(); + }, + + /** + * Initialise the domain index toggle buttons + */ + initDomainIndexTable: () => { + const toggler = (el) => { + const idNumber = el.id.substr(7); + const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); + if (el.src.substr(-9) === "minus.png") { + el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; + toggledRows.forEach((el) => (el.style.display = "none")); + } else { + el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; + toggledRows.forEach((el) => (el.style.display = "")); + } + }; + + const togglerElements = document.querySelectorAll("img.toggler"); + togglerElements.forEach((el) => + el.addEventListener("click", (event) => toggler(event.currentTarget)) + ); + togglerElements.forEach((el) => (el.style.display = "")); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); + }, + + initOnKeyListeners: () => { + // only install a listener if it is really needed + if ( + !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && + !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS + ) + return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.altKey || event.ctrlKey || event.metaKey) return; + + if (!event.shiftKey) { + switch (event.key) { + case "ArrowLeft": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const prevLink = document.querySelector('link[rel="prev"]'); + if (prevLink && prevLink.href) { + window.location.href = prevLink.href; + event.preventDefault(); + } + break; + case "ArrowRight": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const nextLink = document.querySelector('link[rel="next"]'); + if (nextLink && nextLink.href) { + window.location.href = nextLink.href; + event.preventDefault(); + } + break; + } + } + + // some keyboard layouts may need Shift to get / + switch (event.key) { + case "/": + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; + Documentation.focusSearchBar(); + event.preventDefault(); + } + }); + }, +}; + +// quick alias for translations +const _ = Documentation.gettext; + +_ready(Documentation.init); diff --git a/_static/documentation_options.js b/_static/documentation_options.js new file mode 100644 index 0000000..c2ad88a --- /dev/null +++ b/_static/documentation_options.js @@ -0,0 +1,13 @@ +const DOCUMENTATION_OPTIONS = { + VERSION: '1.0', + LANGUAGE: 'zh-CN', + COLLAPSE_INDEX: false, + BUILDER: 'html', + FILE_SUFFIX: '.html', + LINK_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt', + NAVIGATION_WITH_KEYS: false, + SHOW_SEARCH_SUMMARY: true, + ENABLE_SEARCH_SHORTCUTS: true, +}; \ No newline at end of file diff --git a/_static/file.png b/_static/file.png new file mode 100644 index 0000000..a858a41 Binary files /dev/null and b/_static/file.png differ diff --git a/_static/fonts/Lato/lato-bold.eot b/_static/fonts/Lato/lato-bold.eot new file mode 100644 index 0000000..3361183 Binary files /dev/null and b/_static/fonts/Lato/lato-bold.eot differ diff --git a/_static/fonts/Lato/lato-bold.ttf b/_static/fonts/Lato/lato-bold.ttf new file mode 100644 index 0000000..29f691d Binary files /dev/null and b/_static/fonts/Lato/lato-bold.ttf differ diff --git a/_static/fonts/Lato/lato-bold.woff b/_static/fonts/Lato/lato-bold.woff new file mode 100644 index 0000000..c6dff51 Binary files /dev/null and b/_static/fonts/Lato/lato-bold.woff differ diff --git a/_static/fonts/Lato/lato-bold.woff2 b/_static/fonts/Lato/lato-bold.woff2 new file mode 100644 index 0000000..bb19504 Binary files /dev/null and b/_static/fonts/Lato/lato-bold.woff2 differ diff --git a/_static/fonts/Lato/lato-bolditalic.eot b/_static/fonts/Lato/lato-bolditalic.eot new file mode 100644 index 0000000..3d41549 Binary files /dev/null and b/_static/fonts/Lato/lato-bolditalic.eot differ diff --git a/_static/fonts/Lato/lato-bolditalic.ttf b/_static/fonts/Lato/lato-bolditalic.ttf new file mode 100644 index 0000000..f402040 Binary files /dev/null and b/_static/fonts/Lato/lato-bolditalic.ttf differ diff --git a/_static/fonts/Lato/lato-bolditalic.woff b/_static/fonts/Lato/lato-bolditalic.woff new file mode 100644 index 0000000..88ad05b Binary files /dev/null and b/_static/fonts/Lato/lato-bolditalic.woff differ diff --git a/_static/fonts/Lato/lato-bolditalic.woff2 b/_static/fonts/Lato/lato-bolditalic.woff2 new file mode 100644 index 0000000..c4e3d80 Binary files /dev/null and b/_static/fonts/Lato/lato-bolditalic.woff2 differ diff --git a/_static/fonts/Lato/lato-italic.eot b/_static/fonts/Lato/lato-italic.eot new file mode 100644 index 0000000..3f82642 Binary files /dev/null and b/_static/fonts/Lato/lato-italic.eot differ diff --git a/_static/fonts/Lato/lato-italic.ttf b/_static/fonts/Lato/lato-italic.ttf new file mode 100644 index 0000000..b4bfc9b Binary files /dev/null and b/_static/fonts/Lato/lato-italic.ttf differ diff --git a/_static/fonts/Lato/lato-italic.woff b/_static/fonts/Lato/lato-italic.woff new file mode 100644 index 0000000..76114bc Binary files /dev/null and b/_static/fonts/Lato/lato-italic.woff differ diff --git a/_static/fonts/Lato/lato-italic.woff2 b/_static/fonts/Lato/lato-italic.woff2 new file mode 100644 index 0000000..3404f37 Binary files /dev/null and b/_static/fonts/Lato/lato-italic.woff2 differ diff --git a/_static/fonts/Lato/lato-regular.eot b/_static/fonts/Lato/lato-regular.eot new file mode 100644 index 0000000..11e3f2a Binary files /dev/null and b/_static/fonts/Lato/lato-regular.eot differ diff --git a/_static/fonts/Lato/lato-regular.ttf b/_static/fonts/Lato/lato-regular.ttf new file mode 100644 index 0000000..74decd9 Binary files /dev/null and b/_static/fonts/Lato/lato-regular.ttf differ diff --git a/_static/fonts/Lato/lato-regular.woff b/_static/fonts/Lato/lato-regular.woff new file mode 100644 index 0000000..ae1307f Binary files /dev/null and b/_static/fonts/Lato/lato-regular.woff differ diff --git a/_static/fonts/Lato/lato-regular.woff2 b/_static/fonts/Lato/lato-regular.woff2 new file mode 100644 index 0000000..3bf9843 Binary files /dev/null and b/_static/fonts/Lato/lato-regular.woff2 differ diff --git a/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot b/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot new file mode 100644 index 0000000..79dc8ef Binary files /dev/null and b/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot differ diff --git a/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf b/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf new file mode 100644 index 0000000..df5d1df Binary files /dev/null and b/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf differ diff --git a/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff b/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff new file mode 100644 index 0000000..6cb6000 Binary files /dev/null and b/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff differ diff --git a/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 b/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 new file mode 100644 index 0000000..7059e23 Binary files /dev/null and b/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 differ diff --git a/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot b/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot new file mode 100644 index 0000000..2f7ca78 Binary files /dev/null and b/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot differ diff --git a/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf b/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf new file mode 100644 index 0000000..eb52a79 Binary files /dev/null and b/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf differ diff --git a/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff b/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff new file mode 100644 index 0000000..f815f63 Binary files /dev/null and b/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff differ diff --git a/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 b/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 new file mode 100644 index 0000000..f2c76e5 Binary files /dev/null and b/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 differ diff --git a/_static/images/deepspeed.png b/_static/images/deepspeed.png new file mode 100644 index 0000000..79d7d70 Binary files /dev/null and b/_static/images/deepspeed.png differ diff --git a/_static/images/diffusers.png b/_static/images/diffusers.png new file mode 100644 index 0000000..9c4c45a Binary files /dev/null and b/_static/images/diffusers.png differ diff --git a/_static/images/huggingface.png b/_static/images/huggingface.png new file mode 100644 index 0000000..1937e85 Binary files /dev/null and b/_static/images/huggingface.png differ diff --git a/_static/images/llama-factory.png b/_static/images/llama-factory.png new file mode 100644 index 0000000..cef7c49 Binary files /dev/null and b/_static/images/llama-factory.png differ diff --git a/_static/images/llama_cpp.png b/_static/images/llama_cpp.png new file mode 100644 index 0000000..365c5b8 Binary files /dev/null and b/_static/images/llama_cpp.png differ diff --git a/_static/images/lm-deploy.png b/_static/images/lm-deploy.png new file mode 100644 index 0000000..74aee50 Binary files /dev/null and b/_static/images/lm-deploy.png differ diff --git a/_static/images/lm-evalution.png b/_static/images/lm-evalution.png new file mode 100644 index 0000000..a135604 Binary files /dev/null and b/_static/images/lm-evalution.png differ diff --git a/_static/images/onnxruntime.png b/_static/images/onnxruntime.png new file mode 100644 index 0000000..aa63d44 Binary files /dev/null and b/_static/images/onnxruntime.png differ diff --git a/_static/images/opencompass.png b/_static/images/opencompass.png new file mode 100644 index 0000000..90bd471 Binary files /dev/null and b/_static/images/opencompass.png differ diff --git a/_static/images/opencv.png b/_static/images/opencv.png new file mode 100644 index 0000000..dc2f86a Binary files /dev/null and b/_static/images/opencv.png differ diff --git a/_static/images/pytorch.png b/_static/images/pytorch.png new file mode 100644 index 0000000..87b4a8a Binary files /dev/null and b/_static/images/pytorch.png differ diff --git a/_static/images/pytorch_wechat.jpg b/_static/images/pytorch_wechat.jpg new file mode 100644 index 0000000..ba0dae8 Binary files /dev/null and b/_static/images/pytorch_wechat.jpg differ diff --git a/_static/images/sd-webui.png b/_static/images/sd-webui.png new file mode 100644 index 0000000..17d9b87 Binary files /dev/null and b/_static/images/sd-webui.png differ diff --git a/_static/images/sentence_transformers.png b/_static/images/sentence_transformers.png new file mode 100644 index 0000000..68841b6 Binary files /dev/null and b/_static/images/sentence_transformers.png differ diff --git a/_static/images/trl.png b/_static/images/trl.png new file mode 100644 index 0000000..96e5321 Binary files /dev/null and b/_static/images/trl.png differ diff --git a/_static/images/wenet.png b/_static/images/wenet.png new file mode 100644 index 0000000..0ceb2d7 Binary files /dev/null and b/_static/images/wenet.png differ diff --git a/_static/images/whisper_cpp.png b/_static/images/whisper_cpp.png new file mode 100644 index 0000000..550bc13 Binary files /dev/null and b/_static/images/whisper_cpp.png differ diff --git a/_static/jquery.js b/_static/jquery.js new file mode 100644 index 0000000..c4c6022 --- /dev/null +++ b/_static/jquery.js @@ -0,0 +1,2 @@ +/*! jQuery v3.6.0 | (c) OpenJS Foundation and other contributors | jquery.org/license */ +!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType&&"function"!=typeof e.item},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.6.0",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp(F),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+F),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\([^\\r\\n\\f])","g"),ne=function(e,t){var n="0x"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?"#"+s:":scope")+" "+xe(l[o]);c=l.join(",")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute("id")}}}return g(t.replace($,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e&&e.namespaceURI,n=e&&(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement("div")),"undefined"!=typeof e.querySelectorAll&&!e.querySelectorAll(":scope fieldset div").length}),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+S+"-]").length||v.push("~="),(t=C.createElement("input")).setAttribute("name",""),e.appendChild(t),e.querySelectorAll("[name='']").length||v.push("\\["+M+"*name"+M+"*="+M+"*(?:''|\"\")"),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+S+"+*").length||v.push(".#.+[+~]"),e.querySelectorAll("\\\f"),v.push("[\\r\\n\\f]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",F)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},j=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&m(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):"string"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||D,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,D=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement("div")),(fe=E.createElement("input")).setAttribute("type","radio"),fe.setAttribute("checked","checked"),fe.setAttribute("name","t"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML="",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML="",y.option=!!ce.lastChild;var ge={thead:[1,"","
"],col:[2,"","
"],tr:[2,"","
"],td:[3,"","
"],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n",""]);var me=/<|&#?\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\s*$/g;function je(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&S(e).children("tbody")[0]||e}function De(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function qe(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Le(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,"handle events"),s)for(n=0,r=s[i].length;n").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var _t,zt=[],Ut=/(=)\?(?=&|$)|\?\?/;S.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=zt.pop()||S.expando+"_"+wt.guid++;return this[e]=!0,e}}),S.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Ut.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Ut.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Ut,"$1"+r):!1!==e.jsonp&&(e.url+=(Tt.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||S.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,zt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((_t=E.implementation.createHTMLDocument("").body).innerHTML="
",2===_t.childNodes.length),S.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,"position"),c=S(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=S.css(e,"top"),u=S.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===S.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===S.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,"borderTopWidth",!0),i.left+=S.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-S.css(r,"marginTop",!0),left:t.left-i.left-S.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===S.css(e,"position"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each(["top","left"],function(e,n){S.cssHooks[n]=Fe(y.pixelPosition,function(e,t){if(t)return t=We(e,n),Pe.test(t)?S(e).position()[n]+"px":t})}),S.each({Height:"height",Width:"width"},function(a,s){S.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){S.fn[n]=function(e,t){return 0"),n("table.docutils.footnote").wrap("
"),n("table.docutils.citation").wrap("
"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n(''),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}if(t.length>0){$(".wy-menu-vertical .current").removeClass("current").attr("aria-expanded","false"),t.addClass("current").attr("aria-expanded","true"),t.closest("li.toctree-l1").parent().addClass("current").attr("aria-expanded","true");for(let n=1;n<=10;n++)t.closest("li.toctree-l"+n).addClass("current").attr("aria-expanded","true");t[0].scrollIntoView()}}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current").attr("aria-expanded","false"),e.siblings().find("li.current").removeClass("current").attr("aria-expanded","false");var t=e.find("> ul li");t.length&&(t.removeClass("current").attr("aria-expanded","false"),e.toggleClass("current").attr("aria-expanded",(function(n,e){return"true"==e?"false":"true"})))}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t a.language.name.localeCompare(b.language.name)); + + const languagesHTML = ` +
+
Languages
+ ${languages + .map( + (translation) => ` +
+ ${translation.language.code} +
+ `, + ) + .join("\n")} +
+ `; + return languagesHTML; + } + + function renderVersions(config) { + if (!config.versions.active.length) { + return ""; + } + const versionsHTML = ` +
+
版本
+ ${config.versions.active + .map( + (version) => ` +
+ ${version.slug} +
+ `, + ) + .join("\n")} +
+ `; + return versionsHTML; + } + + function renderDownloads(config) { + if (!Object.keys(config.versions.current.downloads).length) { + return ""; + } + const downloadsNameDisplay = { + pdf: "PDF", + epub: "Epub", + htmlzip: "HTML", + }; + + const downloadsHTML = ` +
+
下载
+ ${Object.entries(config.versions.current.downloads) + .map( + ([name, url]) => ` +
+ ${downloadsNameDisplay[name]} +
+ `, + ) + .join("\n")} +
+ `; + return downloadsHTML; + } + + document.addEventListener("readthedocs-addons-data-ready", function (event) { + const config = event.detail.data(); + + const flyout = ` +
+ + Read the Docs + v: ${config.versions.current.slug} + + +
+
+ ${renderLanguages(config)} + ${renderVersions(config)} + ${renderDownloads(config)} +
+
托管于 Read the Docs
+
+ 项目主页 +
+
+ 构建 +
+
+ 下载 +
+
+
+
搜索
+
+
+ +
+
+
+
+ + Hosted by Read the Docs + +
+
+ `; + + // Inject the generated flyout into the body HTML element. + document.body.insertAdjacentHTML("beforeend", flyout); + + // Trigger the Read the Docs Addons Search modal when clicking on the "Search docs" input from inside the flyout. + document + .querySelector("#flyout-search-form") + .addEventListener("focusin", () => { + const event = new CustomEvent("readthedocs-search-show"); + document.dispatchEvent(event); + }); + }) +} + +if (themeLanguageSelector || themeVersionSelector) { + function onSelectorSwitch(event) { + const option = event.target.selectedIndex; + const item = event.target.options[option]; + window.location.href = item.dataset.url; + } + + document.addEventListener("readthedocs-addons-data-ready", function (event) { + const config = event.detail.data(); + + const versionSwitch = document.querySelector( + "div.switch-menus > div.version-switch", + ); + if (themeVersionSelector) { + let versions = config.versions.active; + if (config.versions.current.hidden || config.versions.current.type === "external") { + versions.unshift(config.versions.current); + } + const versionSelect = ` + + `; + + versionSwitch.innerHTML = versionSelect; + versionSwitch.firstElementChild.addEventListener("change", onSelectorSwitch); + } + + const languageSwitch = document.querySelector( + "div.switch-menus > div.language-switch", + ); + + if (themeLanguageSelector) { + if (config.projects.translations.length) { + // Add the current language to the options on the selector + let languages = config.projects.translations.concat( + config.projects.current, + ); + languages = languages.sort((a, b) => + a.language.name.localeCompare(b.language.name), + ); + + const languageSelect = ` + + `; + + languageSwitch.innerHTML = languageSelect; + languageSwitch.firstElementChild.addEventListener("change", onSelectorSwitch); + } + else { + languageSwitch.remove(); + } + } + }); +} + +document.addEventListener("readthedocs-addons-data-ready", function (event) { + // Trigger the Read the Docs Addons Search modal when clicking on "Search docs" input from the topnav. + document + .querySelector("[role='search'] input") + .addEventListener("focusin", () => { + const event = new CustomEvent("readthedocs-search-show"); + document.dispatchEvent(event); + }); +}); \ No newline at end of file diff --git a/_static/language_data.js b/_static/language_data.js new file mode 100644 index 0000000..c7fe6c6 --- /dev/null +++ b/_static/language_data.js @@ -0,0 +1,192 @@ +/* + * This script contains the language-specific data used by searchtools.js, + * namely the list of stopwords, stemmer, scorer and splitter. + */ + +var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; + + +/* Non-minified version is copied as a separate JS file, if available */ + +/** + * Porter Stemmer + */ +var Stemmer = function() { + + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; + + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; + + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence + + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + diff --git a/_static/llamacpp_actions.js b/_static/llamacpp_actions.js new file mode 100644 index 0000000..b932027 --- /dev/null +++ b/_static/llamacpp_actions.js @@ -0,0 +1,49 @@ +$(document).ready(function () { + $.reset_selection = function (elem) { + elem.parent().children().each(function () { + $(this).removeClass("selected"); + }); + } + + $.get_options = function () { + var options = {}; + $('#col-values').children().each(function () { + var elem = $(this).find(".selected").each(function () { + var id = $(this).attr("id").split("-"); + var category = id[0]; + var value = id[1]; + options[category] = value; + }); + }); + return options; + } + + $.update_table = function () { + var options = $.get_options(); + } + + $("#col-values").on("click", ".values-element", function () { + id = $(this).attr("id"); + fields = id.split("-"); + + $.reset_selection($(this)); + $(this).addClass("selected"); + $.update_table(); + $.gen_content(); + }); + + + $.gen_content = function () { + var options = $.get_options(); + if (options['install_type'] == "docker") { + $('#install-llamacpp-sourceCode-section').hide(); + $('#install-llamacpp-docker-section').show(); + } else if (options['install_type'] == "sourceCode") { + $('#install-llamacpp-docker-section').hide(); + $('#install-llamacpp-sourceCode-section').show(); + } + } + + $.update_table(); + $.gen_content(); +}); diff --git a/_static/llamafactory_actions.js b/_static/llamafactory_actions.js new file mode 100644 index 0000000..472ba05 --- /dev/null +++ b/_static/llamafactory_actions.js @@ -0,0 +1,49 @@ +$(document).ready(function () { + $.reset_selection = function (elem) { + elem.parent().children().each(function () { + $(this).removeClass("selected"); + }); + } + + $.get_options = function () { + var options = {}; + $('#col-values').children().each(function () { + var elem = $(this).find(".selected").each(function () { + var id = $(this).attr("id").split("-"); + var category = id[0]; + var value = id[1]; + options[category] = value; + }); + }); + return options; + } + + $.update_table = function () { + var options = $.get_options(); + } + + $("#col-values").on("click", ".values-element", function () { + id = $(this).attr("id"); + fields = id.split("-"); + + $.reset_selection($(this)); + $(this).addClass("selected"); + $.update_table(); + $.gen_content(); + }); + + + $.gen_content = function () { + var options = $.get_options(); + if (options['install_type'] == "docker") { + $('#install-llmf-pip-section').hide(); + $('#install-llmf-docker-section').show(); + } else if (options['install_type'] == "pip") { + $('#install-llmf-docker-section').hide(); + $('#install-llmf-pip-section').show(); + } + } + + $.update_table(); + $.gen_content(); +}); diff --git a/_static/minus.png b/_static/minus.png new file mode 100644 index 0000000..d96755f Binary files /dev/null and b/_static/minus.png differ diff --git a/_static/package_info.js b/_static/package_info.js new file mode 100644 index 0000000..55c353c --- /dev/null +++ b/_static/package_info.js @@ -0,0 +1,293 @@ +const package_info = { + '8.0.RC3.beta1': { + x86_64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.0.RC3/Ascend-cann-toolkit_8.0.RC3_linux-x86_64.run', + }, + aarch64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.0.RC3/Ascend-cann-toolkit_8.0.RC3_linux-aarch64.run', + }, + '910b': { + driver_version: '24.1.rc2', + firmware_version: '7.3.0.1.231', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC2/Ascend-hdk-910b-npu-firmware_7.3.0.1.231.run', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 8.0.RC3/Ascend-cann-kernels-910b_8.0.RC3_linux-aarch64.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC2/Ascend-hdk-910b-npu-driver_24.1.rc2_linux-x86_64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC2/Ascend-hdk-910b-npu-driver_24.1.rc2_linux-aarch64.run', + } + }, + '310p': { + driver_version: '24.1.rc2', + firmware_version: '7.3.0.1.231', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC2/Ascend-hdk-310p-npu-firmware_7.3.0.1.231.run?response-content-type=application/octet-stream', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 8.0.RC3/Ascend-cann-kernels-310p_8.0.RC3_linux-aarch64.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC2/Ascend-hdk-310p-npu-driver_24.1.rc2_linux-x86-64.run?response-content-type=application/octet-stream', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC2/Ascend-hdk-310p-npu-driver_24.1.rc2_linux-aarch64.run?response-content-type=application/octet-stream', + } + } + }, + '8.0.RC2': { + x86_64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 8.0.RC2/Ascend-cann-toolkit_8.0.RC2_linux-x86_64.run' + }, + aarch64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 8.0.RC2/Ascend-cann-toolkit_8.0.RC2_linux-aarch64.run' + }, + '910b': { + driver_version: '24.1.rc1', + firmware_version: '7.1.0.6.220', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC1/Ascend-hdk-910b-npu-firmware_7.1.0.6.220.run', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 8.0.RC2/Ascend-cann-kernels-910b_8.0.RC2_linux.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC1/Ascend-hdk-910b-npu-driver_24.1.rc1_linux-x86-64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC1/Ascend-hdk-910b-npu-driver_24.1.rc1_linux-aarch64.run', + } + }, + '310p': { + driver_version: '24.1.rc1', + firmware_version: '7.1.0.6.220', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC1/Ascend-hdk-310p-npu-firmware_7.1.0.6.220.run', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 8.0.RC2/Ascend-cann-kernels-310p_8.0.RC2_linux.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC1/Ascend-hdk-310p-npu-driver_24.1.rc1_linux-x86-64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC1/Ascend-hdk-310p-npu-driver_24.1.rc1_linux-aarch64.run', + } + } + }, + '8.0.RC1': { + x86_64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 8.0.RC1/Ascend-cann-toolkit_8.0.RC1_linux-x86_64.run' + }, + aarch64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 8.0.RC1/Ascend-cann-toolkit_8.0.RC1_linux-aarch64.run' + }, + '910b': { + driver_version: '24.1.rc1', + firmware_version: '7.1.0.6.220', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC1/Ascend-hdk-910b-npu-firmware_7.1.0.6.220.run', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 8.0.RC1/Ascend-cann-kernels-910b_8.0.RC1_linux.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC1/Ascend-hdk-910b-npu-driver_24.1.rc1_linux-x86-64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC1/Ascend-hdk-910b-npu-driver_24.1.rc1_linux-aarch64.run', + } + }, + '310p': { + driver_version: '24.1.rc1', + firmware_version: '7.1.0.6.220', + firmware_url: ' https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC1/Ascend-hdk-310p-npu-firmware_7.1.0.6.220.run', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 8.0.RC1/Ascend-cann-kernels-310p_8.0.RC1_linux.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC1/Ascend-hdk-310p-npu-driver_24.1.rc1_linux-x86-64.run', + }, + aarch64: { + driver_url: ' https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 24.1.RC1/Ascend-hdk-310p-npu-driver_24.1.rc1_linux-aarch64.run', + } + } + }, + '7.0.1': { + x86_64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 7.0.1/Ascend-cann-toolkit_7.0.1_linux-x86_64.run' + }, + aarch64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 7.0.1/Ascend-cann-toolkit_7.0.1_linux-aarch64.run' + }, + '910b': { + driver_version: '23.0.3', + firmware_version: '7.1.0.5.220', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.3/Ascend-hdk-910b-npu-firmware_7.1.0.5.220.run', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 7.0.1/Ascend-cann-kernels-910b_7.0.1_linux.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.3/Ascend-hdk-910b-npu-driver_23.0.3_linux-x86-64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.3/Ascend-hdk-910b-npu-driver_23.0.3_linux-aarch64.run', + } + }, + '310p': { + driver_version: '23.0.1', + firmware_version: '7.1.0.4.220', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.2.1/Ascend-hdk-310p-npu-firmware_7.1.0.4.220.run', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 7.0.1/Ascend-cann-kernels-310p_7.0.1_linux.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.2.1/Ascend-hdk-310p-npu-driver_23.0.1_linux-x86-64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.2.1/Ascend-hdk-310p-npu-driver_23.0.1_linux-aarch64.run', + } + } + }, + '7.0.0': { + x86_64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 7.0.0/Ascend-cann-toolkit_7.0.0_linux-x86_64.run' + }, + aarch64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 7.0.0/Ascend-cann-toolkit_7.0.0_linux-aarch64.run' + }, + '910b': { + driver_version: '23.0.3', + firmware_version: '7.1.0.5.220', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.3/Ascend-hdk-910b-npu-firmware_7.1.0.5.220.run', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 7.0.0/Ascend-cann-kernels-910b_7.0.0_linux.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.3/Ascend-hdk-910b-npu-driver_23.0.3_linux-x86-64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.3/Ascend-hdk-910b-npu-driver_23.0.3_linux-aarch64.run', + } + }, + '310p': { + driver_version: '23.0.1', + firmware_version: '7.1.0.4.220', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.2.1/Ascend-hdk-310p-npu-firmware_7.1.0.4.220.run', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 7.0.0/Ascend-cann-kernels-310p_7.0.0_linux.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.2.1/Ascend-hdk-310p-npu-driver_23.0.1_linux-x86-64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.2.1/Ascend-hdk-310p-npu-driver_23.0.1_linux-aarch64.run', + } + } + }, + '7.0.RC1': { + x86_64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 7.0.RC1/Ascend-cann-toolkit_7.0.RC1_linux-x86_64.run' + }, + aarch64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 7.0.RC1/Ascend-cann-toolkit_7.0.RC1_linux-aarch64.run' + }, + '910b': { + driver_version: '23.0.rc3', + firmware_version: '6.4.0.4.220', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.RC3/Ascend-hdk-910b-npu-firmware_6.4.0.4.220.run', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 7.0.RC1/Ascend-cann-kernels-910b_7.0.RC1_linux.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.RC3/Ascend-hdk-910b-npu-driver_23.0.rc3_linux-x86-64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.RC3/Ascend-hdk-910b-npu-driver_23.0.rc3_linux-aarch64.run', + } + }, + '310p': { + driver_version: '23.0.rc3', + firmware_version: '7.0.0.5.242', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.RC3/Ascend-hdk-310p-npu-firmware_7.0.0.5.242.run', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 7.0.RC1/Ascend-cann-kernels-310p_7.0.RC1_linux.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.RC3/Ascend-hdk-310p-npu-driver_23.0.rc3_linux-x86-64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.RC3/Ascend-hdk-310p-npu-driver_23.0.rc3_linux-aarch64.run', + } + } + }, + '6.3.RC2': { + x86_64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 6.3.RC2/Ascend-cann-toolkit_6.3.RC2_linux-x86_64.run' + }, + aarch64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 6.3.RC2/Ascend-cann-toolkit_6.3.RC2_linux-aarch64.run' + }, + '310p': { + driver_version: '23.0.rc2', + firmware_version: '6.4.12.1.241', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.RC2/Ascend-hdk-310p-npu-firmware_6.4.12.1.241.run', + kernel_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 6.3.RC2/Ascend-cann-kernels-310p_6.3.RC2_linux.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.RC2/Ascend-hdk-310p-npu-driver_23.0.rc2_linux-x86-64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.RC2/Ascend-hdk-310p-npu-driver_23.0.rc2_linux-aarch64.run', + } + } + }, + '6.3.RC1': { + x86_64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 6.3.RC1/Ascend-cann-toolkit_6.3.RC1_linux-x86_64.run' + }, + aarch64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN 6.3.RC1/Ascend-cann-toolkit_6.3.RC1_linux-aarch64.run' + }, + '310p': { + driver_version: '23.0.rc1', + firmware_version: '6.3.0.1.241', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.RC1/Ascend-hdk-310p-npu-firmware_6.3.0.1.241.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.RC1/Ascend-hdk-310p-npu-driver_23.0.rc1_linux-x86-64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend HDK/Ascend HDK 23.0.RC1/Ascend-hdk-310p-npu-driver_23.0.rc1_linux-aarch64.run', + } + } + }, + '6.0.RC1': { + x86_64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN6.0.RC1/Ascend-cann-toolkit_6.0.RC1_linux-x86_64.run' + }, + aarch64: { + url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN6.0.RC1/Ascend-cann-toolkit_6.0.RC1_linux-aarch64.run' + }, + '310p': { + driver_version: '6.0.rc1.2', + firmware_version: '6.0.rc1.2', + firmware_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/AscendHDK/AscendHDK22.0.RC3.2/Ascend-hdk-310p-npu-firmware_6.0.rc1.2.run', + x86_64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/AscendHDK/AscendHDK22.0.RC3.2/Ascend-hdk-310p-npu-driver_6.0.rc1.2_linux-x86-64.run', + }, + aarch64: { + driver_url: 'https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/AscendHDK/AscendHDK22.0.RC3.2/Ascend-hdk-310p-npu-driver_6.0.rc1.2_linux-aarch64.run', + } + } + } +}; + + +const docker_images = [ + 'ascendai/cann:7.0.1.beta1-910b-ubuntu22.04-py3.8', + 'ascendai/cann:7.0.1.beta1-910b-openeuler22.03-py3.8', + 'ascendai/cann:8.0.rc1.beta1-910b-ubuntu22.04-py3.8', + 'ascendai/cann:8.0.rc1.beta1-910b-openeuler22.03-py3.8', + 'ascendai/cann:8.0.rc2.beta1-910b-ubuntu22.04-py3.9', + 'ascendai/cann:8.0.rc2.beta1-910b-ubuntu22.04-py3.10', + 'ascendai/cann:8.0.rc2.beta1-910b-openeuler22.03-py3.8', + 'ascendai/cann:8.0.rc2.beta1-910b-openeuler22.03-py3.9', + 'ascendai/cann:8.0.rc2.beta1-910b-openeuler22.03-py3.10', + 'ascendai/cann:8.0.rc3.beta1-910b-ubuntu22.04-py3.10', + 'ascendai/cann:8.0.rc3.beta1-910b-openeuler22.03-py3.10', + + 'ascendai/cann:7.0.1-310p-openeuler22.03-py3.8', + 'ascendai/cann:7.0.1-310p-ubuntu22.04-py3.8', + 'ascendai/cann:8.0.rc1-310p-ubuntu22.04-py3.9', + 'ascendai/cann:8.0.rc1-310p-openeuler22.03-py3.9', +]; + +const pytorch_versions = { + '2.3.1': { + torch_npu: '2.3.1', + npu_branch: 'v2.3.1-6.0.rc2', + cann: 'CANN 8.0.RC2', + docker: 'ascendai/pytorch:2.3.1' + }, + '2.2.0': { + torch_npu: '2.2.0', + npu_branch: 'v2.2.0-6.0.rc1', + cann: 'CANN 8.0.RC1', + docker: 'ascendai/pytorch:2.2.0' + }, + '2.1.0': { + torch_npu: '2.1.0.post3', + npu_branch: 'v2.1.0-6.0.rc1', + cann: 'CANN 8.0.RC1', + docker: 'ascendai/pytorch:2.1.0' + } +} diff --git a/_static/plus.png b/_static/plus.png new file mode 100644 index 0000000..7107cec Binary files /dev/null and b/_static/plus.png differ diff --git a/_static/pygments.css b/_static/pygments.css new file mode 100644 index 0000000..84ab303 --- /dev/null +++ b/_static/pygments.css @@ -0,0 +1,75 @@ +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f8f8f8; } +.highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ +.highlight .k { color: #008000; font-weight: bold } /* Keyword */ +.highlight .o { color: #666666 } /* Operator */ +.highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #9C6500 } /* Comment.Preproc */ +.highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +.highlight .gr { color: #E40000 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #008400 } /* Generic.Inserted */ +.highlight .go { color: #717171 } /* Generic.Output */ +.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #0044DD } /* Generic.Traceback */ +.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #008000 } /* Keyword.Pseudo */ +.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #B00040 } /* Keyword.Type */ +.highlight .m { color: #666666 } /* Literal.Number */ +.highlight .s { color: #BA2121 } /* Literal.String */ +.highlight .na { color: #687822 } /* Name.Attribute */ +.highlight .nb { color: #008000 } /* Name.Builtin */ +.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */ +.highlight .no { color: #880000 } /* Name.Constant */ +.highlight .nd { color: #AA22FF } /* Name.Decorator */ +.highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #0000FF } /* Name.Function */ +.highlight .nl { color: #767600 } /* Name.Label */ +.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #19177C } /* Name.Variable */ +.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mb { color: #666666 } /* Literal.Number.Bin */ +.highlight .mf { color: #666666 } /* Literal.Number.Float */ +.highlight .mh { color: #666666 } /* Literal.Number.Hex */ +.highlight .mi { color: #666666 } /* Literal.Number.Integer */ +.highlight .mo { color: #666666 } /* Literal.Number.Oct */ +.highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +.highlight .sc { color: #BA2121 } /* Literal.String.Char */ +.highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +.highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +.highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +.highlight .sx { color: #008000 } /* Literal.String.Other */ +.highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +.highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +.highlight .ss { color: #19177C } /* Literal.String.Symbol */ +.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #0000FF } /* Name.Function.Magic */ +.highlight .vc { color: #19177C } /* Name.Variable.Class */ +.highlight .vg { color: #19177C } /* Name.Variable.Global */ +.highlight .vi { color: #19177C } /* Name.Variable.Instance */ +.highlight .vm { color: #19177C } /* Name.Variable.Magic */ +.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/_static/pytorch_actions.js b/_static/pytorch_actions.js new file mode 100644 index 0000000..0f2167c --- /dev/null +++ b/_static/pytorch_actions.js @@ -0,0 +1,96 @@ +$(document).ready(function () { + $.reset_selection = function (elem) { + elem.parent().children().each(function () { + $(this).removeClass("selected"); + }); + } + + $.get_options = function () { + var options = {}; + $('#col-values').children().each(function () { + var elem = $(this).find(".selected").each(function () { + var id = $(this).attr("id").split("-"); + var category = id[0]; + var value = id[1]; + if(category == 'cann' || category == 'pytorch_npu') + options[category] = $(this).text(); + else + options[category] = value; + }); + }); + return options; + } + + $.update_table = function () { + var options = $.get_options(); + var pytorch_version = options['pytorch']; + var match_versions = pytorch_versions[pytorch_version]; + $("#pytorch_npu-version").text(match_versions['torch_npu']); + $("#cann-version").text(match_versions['cann']); + } + + $("#col-values").on("click", ".values-element", function () { + id = $(this).attr("id"); + fields = id.split("-"); + if (fields[0] == "pytorch_npu" || fields[0] == "cann") + return; + + $.reset_selection($(this)); + $(this).addClass("selected"); + $.update_table(); + $.gen_content(); + }); + + + $.gen_content = function () { + var options = $.get_options(); + var pytorch_version = options['pytorch']; + var match_versions = pytorch_versions[pytorch_version]; + if (options['install_type'] == "docker") { + var dockerCommand = ` +docker run \\ + --name cann_container \\ + --device /dev/davinci1 \\ + --device /dev/davinci_manager \\ + --device /dev/devmm_svm \\ + --device /dev/hisi_hdc \\ + -v /usr/local/dcmi:/usr/local/dcmi \\ + -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \\ + -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ \\ + -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \\ + -v /etc/ascend_install.info:/etc/ascend_install.info \\ + -e DRIVER_PATH=/usr/local/Ascend/driver \\ + -it ${match_versions['docker']} bash + `; + + $('#codecell0').html(dockerCommand); + $('#install-pytorch-source-section').hide(); + $('#install-pytorch-pip-section').hide(); + $('#install-pytorch-docker-section').show(); + } else if (options['install_type'] == "pip") { + $('#codecell1').html("# install torch\n"); + if(options['arch'] == "aarch64") + $('#codecell1').append("pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple torch==" + options['pytorch']); + else + $('#codecell1').append("pip3 install torch=="+options['pytorch']+"+cpu --index-url https://download.pytorch.org/whl/cpu"); + + $("#codecell1").append("\n\n# install torch-npu\npip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple torch-npu==" + options['pytorch_npu']); + + $('#install-pytorch-source-section').hide(); + $('#install-pytorch-docker-section').hide(); + $('#install-pytorch-pip-section').show(); + } else { + $("#codecell3").html("# install requirements\nconda install cmake ninja git\n\n# get torch source\ngit clone -b v"+options['pytorch']+" --recursive https://github.com/pytorch/pytorch\ncd pytorch\ngit submodule sync\ngit submodule update --init --recursive\n\n# install torch\npip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt\nexport CMAKE_PREFIX_PATH=${CONDA_PREFIX:-\"$(dirname $(which conda))/../\"}\nUSE_CUDA=0 python setup.py develop"); + + $('#codecell3').append("\n\n# get torch-npu source\ngit clone https://github.com/ascend/pytorch.git -b "+match_versions['npu_branch']+" --depth 1 pytorch_npu\ncd pytorch_npu\n\n# install torch-npu\npip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt\nbash ci/build.sh --python=$(python --version 2>&1 | awk '{print $2}' | cut -d '.' -f 1,2)\npip install dist/torch_npu*.whl"); + + $('#install-pytorch-pip-section').hide(); + $('#install-pytorch-docker-section').hide(); + $('#install-pytorch-source-section').show(); + } + + } + + $.update_table(); + $.gen_content(); +}); diff --git a/_static/searchtools.js b/_static/searchtools.js new file mode 100644 index 0000000..2c774d1 --- /dev/null +++ b/_static/searchtools.js @@ -0,0 +1,632 @@ +/* + * Sphinx JavaScript utilities for the full-text search. + */ +"use strict"; + +/** + * Simple result scoring code. + */ +if (typeof Scorer === "undefined") { + var Scorer = { + // Implement the following function to further tweak the score for each result + // The function takes a result array [docname, title, anchor, descr, score, filename] + // and returns the new score. + /* + score: result => { + const [docname, title, anchor, descr, score, filename, kind] = result + return score + }, + */ + + // query matches the full name of an object + objNameMatch: 11, + // or matches in the last dotted part of the object name + objPartialMatch: 6, + // Additive scores depending on the priority of the object + objPrio: { + 0: 15, // used to be importantResults + 1: 5, // used to be objectResults + 2: -5, // used to be unimportantResults + }, + // Used when the priority is not in the mapping. + objPrioDefault: 0, + + // query found in title + title: 15, + partialTitle: 7, + // query found in terms + term: 5, + partialTerm: 2, + }; +} + +// Global search result kind enum, used by themes to style search results. +class SearchResultKind { + static get index() { return "index"; } + static get object() { return "object"; } + static get text() { return "text"; } + static get title() { return "title"; } +} + +const _removeChildren = (element) => { + while (element && element.lastChild) element.removeChild(element.lastChild); +}; + +/** + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping + */ +const _escapeRegExp = (string) => + string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string + +const _displayItem = (item, searchTerms, highlightTerms) => { + const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; + const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; + const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; + const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; + const contentRoot = document.documentElement.dataset.content_root; + + const [docName, title, anchor, descr, score, _filename, kind] = item; + + let listItem = document.createElement("li"); + // Add a class representing the item's type: + // can be used by a theme's CSS selector for styling + // See SearchResultKind for the class names. + listItem.classList.add(`kind-${kind}`); + let requestUrl; + let linkUrl; + if (docBuilder === "dirhtml") { + // dirhtml builder + let dirname = docName + "/"; + if (dirname.match(/\/index\/$/)) + dirname = dirname.substring(0, dirname.length - 6); + else if (dirname === "index/") dirname = ""; + requestUrl = contentRoot + dirname; + linkUrl = requestUrl; + } else { + // normal html builders + requestUrl = contentRoot + docName + docFileSuffix; + linkUrl = docName + docLinkSuffix; + } + let linkEl = listItem.appendChild(document.createElement("a")); + linkEl.href = linkUrl + anchor; + linkEl.dataset.score = score; + linkEl.innerHTML = title; + if (descr) { + listItem.appendChild(document.createElement("span")).innerHTML = + " (" + descr + ")"; + // highlight search terms in the description + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + } + else if (showSearchSummary) + fetch(requestUrl) + .then((responseData) => responseData.text()) + .then((data) => { + if (data) + listItem.appendChild( + Search.makeSearchSummary(data, searchTerms, anchor) + ); + // highlight search terms in the summary + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + }); + Search.output.appendChild(listItem); +}; +const _finishSearch = (resultCount) => { + Search.stopPulse(); + Search.title.innerText = _("Search Results"); + if (!resultCount) + Search.status.innerText = Documentation.gettext( + "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories." + ); + else + Search.status.innerText = Documentation.ngettext( + "Search finished, found one page matching the search query.", + "Search finished, found ${resultCount} pages matching the search query.", + resultCount, + ).replace('${resultCount}', resultCount); +}; +const _displayNextItem = ( + results, + resultCount, + searchTerms, + highlightTerms, +) => { + // results left, load the summary and display it + // this is intended to be dynamic (don't sub resultsCount) + if (results.length) { + _displayItem(results.pop(), searchTerms, highlightTerms); + setTimeout( + () => _displayNextItem(results, resultCount, searchTerms, highlightTerms), + 5 + ); + } + // search finished, update title and status message + else _finishSearch(resultCount); +}; +// Helper function used by query() to order search results. +// Each input is an array of [docname, title, anchor, descr, score, filename, kind]. +// Order the results by score (in opposite order of appearance, since the +// `_displayNextItem` function uses pop() to retrieve items) and then alphabetically. +const _orderResultsByScoreThenName = (a, b) => { + const leftScore = a[4]; + const rightScore = b[4]; + if (leftScore === rightScore) { + // same score: sort alphabetically + const leftTitle = a[1].toLowerCase(); + const rightTitle = b[1].toLowerCase(); + if (leftTitle === rightTitle) return 0; + return leftTitle > rightTitle ? -1 : 1; // inverted is intentional + } + return leftScore > rightScore ? 1 : -1; +}; + +/** + * Default splitQuery function. Can be overridden in ``sphinx.search`` with a + * custom function per language. + * + * The regular expression works by splitting the string on consecutive characters + * that are not Unicode letters, numbers, underscores, or emoji characters. + * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + */ +if (typeof splitQuery === "undefined") { + var splitQuery = (query) => query + .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) + .filter(term => term) // remove remaining empty strings +} + +/** + * Search Module + */ +const Search = { + _index: null, + _queued_query: null, + _pulse_status: -1, + + htmlToText: (htmlString, anchor) => { + const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); + for (const removalQuery of [".headerlink", "script", "style"]) { + htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() }); + } + if (anchor) { + const anchorContent = htmlElement.querySelector(`[role="main"] ${anchor}`); + if (anchorContent) return anchorContent.textContent; + + console.warn( + `Anchored content block not found. Sphinx search tries to obtain it via DOM query '[role=main] ${anchor}'. Check your theme or template.` + ); + } + + // if anchor not specified or not found, fall back to main content + const docContent = htmlElement.querySelector('[role="main"]'); + if (docContent) return docContent.textContent; + + console.warn( + "Content block not found. Sphinx search tries to obtain it via DOM query '[role=main]'. Check your theme or template." + ); + return ""; + }, + + init: () => { + const query = new URLSearchParams(window.location.search).get("q"); + document + .querySelectorAll('input[name="q"]') + .forEach((el) => (el.value = query)); + if (query) Search.performSearch(query); + }, + + loadIndex: (url) => + (document.body.appendChild(document.createElement("script")).src = url), + + setIndex: (index) => { + Search._index = index; + if (Search._queued_query !== null) { + const query = Search._queued_query; + Search._queued_query = null; + Search.query(query); + } + }, + + hasIndex: () => Search._index !== null, + + deferQuery: (query) => (Search._queued_query = query), + + stopPulse: () => (Search._pulse_status = -1), + + startPulse: () => { + if (Search._pulse_status >= 0) return; + + const pulse = () => { + Search._pulse_status = (Search._pulse_status + 1) % 4; + Search.dots.innerText = ".".repeat(Search._pulse_status); + if (Search._pulse_status >= 0) window.setTimeout(pulse, 500); + }; + pulse(); + }, + + /** + * perform a search for something (or wait until index is loaded) + */ + performSearch: (query) => { + // create the required interface elements + const searchText = document.createElement("h2"); + searchText.textContent = _("Searching"); + const searchSummary = document.createElement("p"); + searchSummary.classList.add("search-summary"); + searchSummary.innerText = ""; + const searchList = document.createElement("ul"); + searchList.setAttribute("role", "list"); + searchList.classList.add("search"); + + const out = document.getElementById("search-results"); + Search.title = out.appendChild(searchText); + Search.dots = Search.title.appendChild(document.createElement("span")); + Search.status = out.appendChild(searchSummary); + Search.output = out.appendChild(searchList); + + const searchProgress = document.getElementById("search-progress"); + // Some themes don't use the search progress node + if (searchProgress) { + searchProgress.innerText = _("Preparing search..."); + } + Search.startPulse(); + + // index already loaded, the browser was quick! + if (Search.hasIndex()) Search.query(query); + else Search.deferQuery(query); + }, + + _parseQuery: (query) => { + // stem the search terms and add them to the correct list + const stemmer = new Stemmer(); + const searchTerms = new Set(); + const excludedTerms = new Set(); + const highlightTerms = new Set(); + const objectTerms = new Set(splitQuery(query.toLowerCase().trim())); + splitQuery(query.trim()).forEach((queryTerm) => { + const queryTermLower = queryTerm.toLowerCase(); + + // maybe skip this "word" + // stopwords array is from language_data.js + if ( + stopwords.indexOf(queryTermLower) !== -1 || + queryTerm.match(/^\d+$/) + ) + return; + + // stem the word + let word = stemmer.stemWord(queryTermLower); + // select the correct list + if (word[0] === "-") excludedTerms.add(word.substr(1)); + else { + searchTerms.add(word); + highlightTerms.add(queryTermLower); + } + }); + + if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js + localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" ")) + } + + // console.debug("SEARCH: searching for:"); + // console.info("required: ", [...searchTerms]); + // console.info("excluded: ", [...excludedTerms]); + + return [query, searchTerms, excludedTerms, highlightTerms, objectTerms]; + }, + + /** + * execute search (requires search index to be loaded) + */ + _performSearch: (query, searchTerms, excludedTerms, highlightTerms, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + + // Collect multiple result groups to be sorted separately and then ordered. + // Each is an array of [docname, title, anchor, descr, score, filename, kind]. + const normalResults = []; + const nonMainIndexResults = []; + + _removeChildren(document.getElementById("search-progress")); + + const queryLower = query.toLowerCase().trim(); + for (const [title, foundTitles] of Object.entries(allTitles)) { + if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { + for (const [file, id] of foundTitles) { + const score = Math.round(Scorer.title * queryLower.length / title.length); + const boost = titles[file] === title ? 1 : 0; // add a boost for document titles + normalResults.push([ + docNames[file], + titles[file] !== title ? `${titles[file]} > ${title}` : title, + id !== null ? "#" + id : "", + null, + score + boost, + filenames[file], + SearchResultKind.title, + ]); + } + } + } + + // search for explicit entries in index directives + for (const [entry, foundEntries] of Object.entries(indexEntries)) { + if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { + for (const [file, id, isMain] of foundEntries) { + const score = Math.round(100 * queryLower.length / entry.length); + const result = [ + docNames[file], + titles[file], + id ? "#" + id : "", + null, + score, + filenames[file], + SearchResultKind.index, + ]; + if (isMain) { + normalResults.push(result); + } else { + nonMainIndexResults.push(result); + } + } + } + } + + // lookup as object + objectTerms.forEach((term) => + normalResults.push(...Search.performObjectSearch(term, objectTerms)) + ); + + // lookup as search terms in fulltext + normalResults.push(...Search.performTermsSearch(searchTerms, excludedTerms)); + + // let the scorer override scores with a custom scoring function + if (Scorer.score) { + normalResults.forEach((item) => (item[4] = Scorer.score(item))); + nonMainIndexResults.forEach((item) => (item[4] = Scorer.score(item))); + } + + // Sort each group of results by score and then alphabetically by name. + normalResults.sort(_orderResultsByScoreThenName); + nonMainIndexResults.sort(_orderResultsByScoreThenName); + + // Combine the result groups in (reverse) order. + // Non-main index entries are typically arbitrary cross-references, + // so display them after other results. + let results = [...nonMainIndexResults, ...normalResults]; + + // remove duplicate search results + // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept + let seen = new Set(); + results = results.reverse().reduce((acc, result) => { + let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(','); + if (!seen.has(resultStr)) { + acc.push(result); + seen.add(resultStr); + } + return acc; + }, []); + + return results.reverse(); + }, + + query: (query) => { + const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query); + const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms); + + // for debugging + //Search.lastresults = results.slice(); // a copy + // console.info("search results:", Search.lastresults); + + // print the results + _displayNextItem(results, results.length, searchTerms, highlightTerms); + }, + + /** + * search for object names + */ + performObjectSearch: (object, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const objects = Search._index.objects; + const objNames = Search._index.objnames; + const titles = Search._index.titles; + + const results = []; + + const objectSearchCallback = (prefix, match) => { + const name = match[4] + const fullname = (prefix ? prefix + "." : "") + name; + const fullnameLower = fullname.toLowerCase(); + if (fullnameLower.indexOf(object) < 0) return; + + let score = 0; + const parts = fullnameLower.split("."); + + // check for different match types: exact matches of full name or + // "last name" (i.e. last dotted part) + if (fullnameLower === object || parts.slice(-1)[0] === object) + score += Scorer.objNameMatch; + else if (parts.slice(-1)[0].indexOf(object) > -1) + score += Scorer.objPartialMatch; // matches in last name + + const objName = objNames[match[1]][2]; + const title = titles[match[0]]; + + // If more than one term searched for, we require other words to be + // found in the name/title/description + const otherTerms = new Set(objectTerms); + otherTerms.delete(object); + if (otherTerms.size > 0) { + const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase(); + if ( + [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0) + ) + return; + } + + let anchor = match[3]; + if (anchor === "") anchor = fullname; + else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname; + + const descr = objName + _(", in ") + title; + + // add custom score for some objects according to scorer + if (Scorer.objPrio.hasOwnProperty(match[2])) + score += Scorer.objPrio[match[2]]; + else score += Scorer.objPrioDefault; + + results.push([ + docNames[match[0]], + fullname, + "#" + anchor, + descr, + score, + filenames[match[0]], + SearchResultKind.object, + ]); + }; + Object.keys(objects).forEach((prefix) => + objects[prefix].forEach((array) => + objectSearchCallback(prefix, array) + ) + ); + return results; + }, + + /** + * search for full-text terms in the index + */ + performTermsSearch: (searchTerms, excludedTerms) => { + // prepare search + const terms = Search._index.terms; + const titleTerms = Search._index.titleterms; + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + + const scoreMap = new Map(); + const fileMap = new Map(); + + // perform the search on the required terms + searchTerms.forEach((word) => { + const files = []; + const arr = [ + { files: terms[word], score: Scorer.term }, + { files: titleTerms[word], score: Scorer.title }, + ]; + // add support for partial matches + if (word.length > 2) { + const escapedWord = _escapeRegExp(word); + if (!terms.hasOwnProperty(word)) { + Object.keys(terms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: terms[term], score: Scorer.partialTerm }); + }); + } + if (!titleTerms.hasOwnProperty(word)) { + Object.keys(titleTerms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: titleTerms[term], score: Scorer.partialTitle }); + }); + } + } + + // no match but word was a required one + if (arr.every((record) => record.files === undefined)) return; + + // found search word in contents + arr.forEach((record) => { + if (record.files === undefined) return; + + let recordFiles = record.files; + if (recordFiles.length === undefined) recordFiles = [recordFiles]; + files.push(...recordFiles); + + // set score for the word in each file + recordFiles.forEach((file) => { + if (!scoreMap.has(file)) scoreMap.set(file, {}); + scoreMap.get(file)[word] = record.score; + }); + }); + + // create the mapping + files.forEach((file) => { + if (!fileMap.has(file)) fileMap.set(file, [word]); + else if (fileMap.get(file).indexOf(word) === -1) fileMap.get(file).push(word); + }); + }); + + // now check if the files don't contain excluded terms + const results = []; + for (const [file, wordList] of fileMap) { + // check if all requirements are matched + + // as search terms with length < 3 are discarded + const filteredTermCount = [...searchTerms].filter( + (term) => term.length > 2 + ).length; + if ( + wordList.length !== searchTerms.size && + wordList.length !== filteredTermCount + ) + continue; + + // ensure that none of the excluded terms is in the search result + if ( + [...excludedTerms].some( + (term) => + terms[term] === file || + titleTerms[term] === file || + (terms[term] || []).includes(file) || + (titleTerms[term] || []).includes(file) + ) + ) + break; + + // select one (max) score for the file. + const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w])); + // add result to the result list + results.push([ + docNames[file], + titles[file], + "", + null, + score, + filenames[file], + SearchResultKind.text, + ]); + } + return results; + }, + + /** + * helper function to return a node containing the + * search summary for a given text. keywords is a list + * of stemmed words. + */ + makeSearchSummary: (htmlText, keywords, anchor) => { + const text = Search.htmlToText(htmlText, anchor); + if (text === "") return null; + + const textLower = text.toLowerCase(); + const actualStartPosition = [...keywords] + .map((k) => textLower.indexOf(k.toLowerCase())) + .filter((i) => i > -1) + .slice(-1)[0]; + const startWithContext = Math.max(actualStartPosition - 120, 0); + + const top = startWithContext === 0 ? "" : "..."; + const tail = startWithContext + 240 < text.length ? "..." : ""; + + let summary = document.createElement("p"); + summary.classList.add("context"); + summary.textContent = top + text.substr(startWithContext, 240).trim() + tail; + + return summary; + }, +}; + +_ready(Search.init); diff --git a/_static/sphinx_highlight.js b/_static/sphinx_highlight.js new file mode 100644 index 0000000..8a96c69 --- /dev/null +++ b/_static/sphinx_highlight.js @@ -0,0 +1,154 @@ +/* Highlighting utilities for Sphinx HTML documentation. */ +"use strict"; + +const SPHINX_HIGHLIGHT_ENABLED = true + +/** + * highlight a given string on a node by wrapping it in + * span elements with the given class name. + */ +const _highlight = (node, addItems, text, className) => { + if (node.nodeType === Node.TEXT_NODE) { + const val = node.nodeValue; + const parent = node.parentNode; + const pos = val.toLowerCase().indexOf(text); + if ( + pos >= 0 && + !parent.classList.contains(className) && + !parent.classList.contains("nohighlight") + ) { + let span; + + const closestNode = parent.closest("body, svg, foreignObject"); + const isInSVG = closestNode && closestNode.matches("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.classList.add(className); + } + + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + const rest = document.createTextNode(val.substr(pos + text.length)); + parent.insertBefore( + span, + parent.insertBefore( + rest, + node.nextSibling + ) + ); + node.nodeValue = val.substr(0, pos); + /* There may be more occurrences of search term in this node. So call this + * function recursively on the remaining fragment. + */ + _highlight(rest, addItems, text, className); + + if (isInSVG) { + const rect = document.createElementNS( + "http://www.w3.org/2000/svg", + "rect" + ); + const bbox = parent.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute("class", className); + addItems.push({ parent: parent, target: rect }); + } + } + } else if (node.matches && !node.matches("button, select, textarea")) { + node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); + } +}; +const _highlightText = (thisNode, text, className) => { + let addItems = []; + _highlight(thisNode, addItems, text, className); + addItems.forEach((obj) => + obj.parent.insertAdjacentElement("beforebegin", obj.target) + ); +}; + +/** + * Small JavaScript module for the documentation. + */ +const SphinxHighlight = { + + /** + * highlight the search words provided in localstorage in the text + */ + highlightSearchWords: () => { + if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight + + // get and clear terms from localstorage + const url = new URL(window.location); + const highlight = + localStorage.getItem("sphinx_highlight_terms") + || url.searchParams.get("highlight") + || ""; + localStorage.removeItem("sphinx_highlight_terms") + url.searchParams.delete("highlight"); + window.history.replaceState({}, "", url); + + // get individual terms from highlight string + const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); + if (terms.length === 0) return; // nothing to do + + // There should never be more than one element matching "div.body" + const divBody = document.querySelectorAll("div.body"); + const body = divBody.length ? divBody[0] : document.querySelector("body"); + window.setTimeout(() => { + terms.forEach((term) => _highlightText(body, term, "highlighted")); + }, 10); + + const searchBox = document.getElementById("searchbox"); + if (searchBox === null) return; + searchBox.appendChild( + document + .createRange() + .createContextualFragment( + '" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/_static/statistics.js b/_static/statistics.js new file mode 100644 index 0000000..a9916e4 --- /dev/null +++ b/_static/statistics.js @@ -0,0 +1,7 @@ +var _hmt = _hmt || []; +(function() { + var hm = document.createElement("script"); + hm.src = "https://hm.baidu.com/hm.js?102ee21b68fd0e419aa031b4b9645cb5"; + var s = document.getElementsByTagName("script")[0]; + s.parentNode.insertBefore(hm, s); +})(); diff --git a/_static/translations.js b/_static/translations.js new file mode 100644 index 0000000..59f9b0f --- /dev/null +++ b/_static/translations.js @@ -0,0 +1,60 @@ +Documentation.addTranslations({ + "locale": "zh_Hans_CN", + "messages": { + "%(filename)s — %(docstitle)s": "%(filename)s — %(docstitle)s", + "© %(copyright_prefix)s %(copyright)s.": "© %(copyright_prefix)s %(copyright)s.", + ", in ": "\uff0c\u5728 ", + "About these documents": "\u5173\u4e8e\u6b64\u6587\u6863", + "Automatically generated list of changes in version %(version)s": "\u81ea\u52a8\u751f\u6210\u7684 %(version)s \u7248\u672c\u53d8\u66f4\u5217\u8868", + "C API changes": "C API \u7684\u53d8\u66f4", + "Changes in Version %(version)s — %(docstitle)s": "\u4e8e\u7248\u672c %(version)s— %(docstitle)s \u53d8\u66f4", + "Collapse sidebar": "\u6298\u53e0\u8fb9\u680f", + "Complete Table of Contents": "\u5b8c\u6574\u76ee\u5f55", + "Contents": "\u76ee\u5f55", + "Copyright": "\u7248\u6743\u6240\u6709", + "Created using Sphinx %(sphinx_version)s.": "\u7531 Sphinx %(sphinx_version)s\u521b\u5efa\u3002", + "Expand sidebar": "\u5c55\u5f00\u8fb9\u680f", + "Full index on one page": "\u5355\u9875\u5168\u7d22\u5f15", + "General Index": "\u603b\u7d22\u5f15", + "Global Module Index": "\u5168\u5c40\u6a21\u5757\u7d22\u5f15", + "Go": "\u63d0\u4ea4", + "Hide Search Matches": "\u9690\u85cf\u641c\u7d22\u7ed3\u679c", + "Index": "\u7d22\u5f15", + "Index – %(key)s": "\u7d22\u5f15 – %(key)s", + "Index pages by letter": "\u5b57\u6bcd\u7d22\u5f15", + "Indices and tables:": "\u7d22\u5f15\u548c\u8868\u683c\uff1a", + "Last updated on %(last_updated)s.": "\u6700\u540e\u66f4\u65b0\u4e8e %(last_updated)s.", + "Library changes": "\u5e93\u7684\u53d8\u66f4", + "Navigation": "\u5bfc\u822a", + "Next topic": "\u4e0b\u4e00\u4e3b\u9898", + "Other changes": "\u5176\u4ed6\u53d8\u66f4", + "Overview": "\u6982\u8ff0", + "Please activate JavaScript to enable the search\n functionality.": "\u8bf7\u6fc0\u6d3b JavaScript \u4ee5\u5f00\u542f\u641c\u7d22\u529f\u80fd\u3002", + "Preparing search...": "\u6b63\u5728\u51c6\u5907\u641c\u7d22\u2026\u2026", + "Previous topic": "\u4e0a\u4e00\u4e3b\u9898", + "Quick search": "\u5feb\u901f\u641c\u7d22", + "Search": "\u641c\u7d22", + "Search Page": "\u641c\u7d22\u9875\u9762", + "Search Results": "\u641c\u7d22\u7ed3\u679c", + "Search finished, found ${resultCount} page(s) matching the search query.": "\u641c\u7d22\u5b8c\u6210\uff0c\u5339\u914d\u5230 ${resultCount} \u9875\u3002", + "Search within %(docstitle)s": "\u5728 %(docstitle)s \u4e2d\u641c\u7d22", + "Searching": "\u6b63\u5728\u641c\u7d22\u4e2d", + "Searching for multiple words only shows matches that contain\n all words.": "\u5f53\u641c\u7d22\u591a\u4e2a\u5173\u952e\u8bcd\u65f6\uff0c\u53ea\u4f1a\u663e\u793a\u540c\u65f6\u5305\u542b\u6240\u6709\u5173\u952e\u8bcd\u7684\u5185\u5bb9\u3002", + "Show Source": "\u663e\u793a\u6e90\u4ee3\u7801", + "Table of Contents": "\u76ee\u5f55", + "This Page": "\u672c\u9875", + "Welcome! This is": "\u6b22\u8fce\uff01", + "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories.": "\u60a8\u7684\u641c\u7d22\u6ca1\u6709\u5339\u914d\u5230\u6587\u6863\u3002\u8bf7\u786e\u4fdd\u5173\u952e\u8bcd\u62fc\u5199\u6b63\u786e\uff0c\u5e76\u4e14\u9009\u62e9\u4e86\u5408\u9002\u7684\u5206\u7c7b\u3002", + "all functions, classes, terms": "\u6240\u6709\u51fd\u6570\u3001\u7c7b\u3001\u672f\u8bed\u8bcd\u6c47", + "can be huge": "\u53ef\u80fd\u4f1a\u5927", + "last updated": "\u6700\u540e\u66f4\u65b0\u4e8e", + "lists all sections and subsections": "\u5217\u51fa\u6240\u6709\u7684\u7ae0\u8282\u548c\u90e8\u5206", + "next chapter": "\u4e0b\u4e00\u7ae0", + "previous chapter": "\u4e0a\u4e00\u7ae0", + "quick access to all modules": "\u5feb\u901f\u67e5\u770b\u6240\u6709\u7684\u6a21\u5757", + "search": "\u641c\u7d22", + "search this documentation": "\u641c\u7d22\u6587\u6863", + "the documentation for": "\u672c\u6587\u6863\u5c5e\u4e8e" + }, + "plural_expr": "0" +}); \ No newline at end of file diff --git a/genindex.html b/genindex.html new file mode 100644 index 0000000..cd93491 --- /dev/null +++ b/genindex.html @@ -0,0 +1,239 @@ + + + + + + + + 索引 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ + +
+
+ + + + \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 0000000..0237c6b --- /dev/null +++ b/index.html @@ -0,0 +1,458 @@ + + + + + + + + + 昇腾开源 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

昇腾开源

+
+
+
+
+
+

警告

+

文档仍在开发中,内容可能存在错误,内容可能会随时更新,请勿将其用于生产环境。

+
+

选择您的偏好,并按照 快速安装昇腾环境 的安装指导进行操作。

+

安装成功后,请参考各项目的快速开始和样例来开始使用昇腾AI处理器。

+
+
+ +
+
+
+
+

LLaMA-Factory

+

便捷高效的大模型微调工具。V0.7.1版本起支持昇腾。

+
+
+
+ +
+ +
+
+
+
+

PyTorch

+

PyTorch AI框架 2.1版本官方支持昇腾

+
+
+
+ +
+ +
+
+
+
+

ONNX Runtime

+

跨平台、高性能 ML 推理和训练加速器。v1.13.1版本起原生支持昇腾

+
+
+
+ +
+ +
+
+
+
+

DeepSpeed

+

深度学习优化库,使得分布式训练和推理变得简单、高效、有效。 + V0.10.1版本起支持昇腾。

+
+
+
+ +
+ +
+
+
+
+

OpenCV

+

开源计算机视觉库

+
+
+
+ +
+ +
+
+
+
+

Stable Diffusion web UI

+

Stable diffusion可视化工具链

+
+
+
+ +
+ +
+
+
+
+

Transformers

+

适用于 Pytorch、TensorFlow 和 JAX 先进的机器学习库 + v4.32.0起支持昇腾

+
+
+
+ +
+ +
+
+
+
+

Diffusers

+

图像和音频生成等扩散模型工具链

+
+
+
+ +
+ +
+
+
+
+

Accelerate

+

适用于Pytorch的多GPUs训练工具链

+
+
+
+ +
+ +
+
+
+
+

WeNet

+

端到端的语音识别工具包

+
+
+
+ +
+ +
+
+
+
+

LM-Evalution-Harness

+

语言模型评估工具

+
+
+
+ +
+ +
+
+
+
+

Whisper.cpp

+

Whisper 模型高性能推理语音识别框架

+
+
+
+ +
+ +
+
+
+
+

llama.cpp

+

由C/C++实现的 Meta LLaMa 架构

+
+
+
+ +
+ +
+
+
+
+

Sentence Transformers

+

适用于文本和图像的高性能Embedding库

+
+
+
+ +
+ +
+
+
+
+

Transformer Reinforcement Learning

+

适用于SFT、PPO、DPO等方法的模型后训练库

+
+
+
+ +
+ +
+
+
+
+

OpenCompass

+

大模型标准测试工具

+
+
+
+ +
+ +
+
+
+
+

LMDeploy

+

用于压缩、部署和服务 LLM 的工具包

+
+
+
+ +
+ +
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/objects.inv b/objects.inv new file mode 100644 index 0000000..4bbb9b1 Binary files /dev/null and b/objects.inv differ diff --git a/search.html b/search.html new file mode 100644 index 0000000..a78687b --- /dev/null +++ b/search.html @@ -0,0 +1,149 @@ + + + + + + + + 搜索 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ + + + +
+ +
+ +
+
+ +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/searchindex.js b/searchindex.js new file mode 100644 index 0000000..b6af85f --- /dev/null +++ b/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"alltitles": {"1. \u4f7f\u7528DeepSpeed\u591a\u5361\u5e76\u884c\u8bad\u7ec3": [[10, "id2"]], "1. \u4f7f\u7528\u811a\u672c\u4e0b\u8f7d": [[67, "id2"]], "1. \u5355\u5361\u8bad\u7ec3": [[43, "id2"]], "1. \u5b89\u88c5DeepSpeed": [[9, "deepspeed"]], "1. \u6570\u636e\u5e76\u884c": [[39, "id2"]], "1. \u7cfb\u7edf\u8981\u6c42": [[7, "id2"]], "1. \u9009\u62e9\u9700\u8981\u5b89\u88c5\u7684 PyTorch \u7248\u672c": [[42, "pytorch"]], "1.1 DDP": [[39, "ddp"]], "1.1 \u524d\u7f6e\u68c0\u67e5": [[7, "id3"]], "1.2 FSDP": [[39, "fsdp"]], "1.2 \u8f6f\u4ef6\u8981\u6c42": [[7, "id4"]], "2. \u4f7f\u7528DeepSpeed\u591a\u5361\u5e76\u884c\u8bad\u7ec3": [[43, "deepspeed"]], "2. \u5b89\u88c5 PyTorch": [[42, "id2"]], "2. \u624b\u52a8\u4e0b\u8f7d": [[67, "id3"]], "2. \u73af\u5883\u5b89\u88c5": [[7, "id5"]], "2. \u8bad\u7ec3\u7ed3\u679c\u67e5\u770b": [[10, "id3"]], "2. \u901a\u8fc7\u6e90\u7801\u5b89\u88c5": [[9, "id3"]], "3. \u4f7f\u7528Transforms\u8fdb\u884c\u6a21\u578b\u5fae\u8c03": [[43, "transforms"]], "3. \u5378\u8f7d": [[7, "id6"]], "3. \u81ea\u884c\u8f6c\u6362\u6a21\u578b": [[67, "id4"]], "3. \u9884\u7f16\u8bd1DeepSpeed\u7b97\u5b50\uff08\u53ef\u9009\uff09": [[9, "id4"]], "3. \u9a8c\u8bc1\u5b89\u88c5\u7ed3\u679c": [[42, "id4"]], "4. \u4f7f\u7528Diffusers\u8fdb\u884c\u6a21\u578b\u5fae\u8c03": [[43, "diffusers"]], "4. \u5b89\u88c5\u9a8c\u8bc1": [[9, "id5"]], "API Server\u7684\u542f\u52a8\u4e0e\u8c03\u7528": [[14, "api-server"]], "API\u8bf4\u660e": [[38, null]], "Accelerate": [[4, null]], "Accelerate \u4e0b\u8f7d\u5b89\u88c5": [[5, "accelerate"]], "AutoModelForCausalLM": [[58, "automodelforcausallm"]], "DeepSpeed": [[8, null]], "Diffusers": [[1, null]], "Diffusers \u5b89\u88c5": [[2, "diffusers"]], "Drivers\uff0cFirmware \u548c CANN": [[22, "drivers-firmware-cann"]], "FAQ": [[15, null], [40, null]], "HuggingFace": [[57, "huggingface"]], "LLAMA-Factory \u4e0b\u8f7d\u5b89\u88c5": [[17, "llama-factory"]], "LLM \u63a8\u7406": [[22, "llm"]], "LLM \u6a21\u578b\u670d\u52a1": [[22, "id12"]], "LLaMA-Factory": [[16, null]], "LLaMA-Factory \u5378\u8f7d": [[17, "id3"]], "LM-Evalution-Harness": [[23, null]], "LMDeploy": [[20, null]], "LoRA \u6a21\u578b\u5408\u5e76\u5bfc\u51fa": [[14, "id11"]], "Meta\u5b98\u65b9": [[57, "meta"]], "ONNX Runtime": [[26, null]], "ONNX Runtime \u5b89\u88c5": [[27, "onnx-runtime"]], "OpenCV": [[35, null]], "OpenCV \u5b89\u88c5": [[36, "opencv"]], "OpenCompass": [[32, null]], "OpenCompass \u5b89\u88c5": [[33, "opencompass"]], "PyTorch": [[41, null]], "Python \u73af\u5883\u521b\u5efa": [[2, "python"], [30, "python"], [33, "python"], [36, "python"], [51, "python"], [63, "python"]], "Sentence Transformers": [[47, null]], "Stable-Diffusion-WebUI": [[44, null]], "TRL \u4e0b\u8f7d\u5b89\u88c5": [[60, "trl"]], "Trainer": [[53, "trainer"]], "Transformer Reinforcement Learning": [[59, null]], "Transformers": [[54, null]], "VLM \u63a8\u7406": [[22, "vlm"]], "VLM \u6a21\u578b\u670d\u52a1": [[22, "id13"]], "WeNet": [[62, null]], "WeNet \u5b89\u88c5": [[63, "wenet"]], "Whisper \u6a21\u578b\u4e0b\u8f7d": [[67, "whisper"]], "Whisper.cpp": [[65, null]], "Whisper.cpp \u7f16\u8bd1\u5b89\u88c5": [[66, "whisper-cpp"]], "git lfs": [[57, "git-lfs"]], "hf-mirror": [[57, "hf-mirror"]], "llama.cpp": [[11, null]], "llama.cpp \u4e0b\u8f7d\u5b89\u88c5": [[12, "llama-cpp"]], "lm-evaluation-harness\u5378\u8f7d": [[24, "id4"]], "lm-evaluation-harness\u5b89\u88c5": [[24, "lm-evaluation-harness"]], "lm_deploy \u4e0b\u8f7d\u5b89\u88c5": [[21, "lm-deploy"]], "open_clip": [[29, null]], "open_clip \u5b89\u88c5": [[30, "open-clip"]], "opencompass run.py \u53c2\u6570\u8bf4\u660e": [[34, "id6"]], "pip \u5b89\u88c5": [[2, "pip"]], "pipeline": [[58, "pipeline"]], "pipeline \u4f7f\u7528": [[55, "id2"]], "pipeline \u62bd\u8c61\u7c7b": [[55, "pipeline"]], "sentence-transformers \u4e0b\u8f7d\u5b89\u88c5": [[48, "sentence-transformers"]], "timm": [[50, null]], "timm \u5b89\u88c5": [[51, "timm"]], "torch-npu \u5b89\u88c5": [[30, "torch-npu"], [33, "torch-npu"], [51, "torch-npu"]], "yaml \u914d\u7f6e\u6587\u4ef6": [[19, "yaml"]], "\u4e00\u7ad9\u5f0f webui board \u7684\u4f7f\u7528": [[14, "webui-board"]], "\u4e0b\u8f7d\u6570\u636e": [[64, "id3"]], "\u4e3b\u8981\u53c2\u6570": [[46, "id3"]], "\u4ecepip\u5b89\u88c5": [[27, "pip"]], "\u4ece\u6e90\u7801\u5b89\u88c5": [[21, "id2"], [27, "id2"]], "\u4f7f\u7528 C++": [[37, "c"]], "\u4f7f\u7528 NPU \u7684\u63a8\u7406": [[31, "id4"]], "\u4f7f\u7528 NPU \u7684\u8bad\u7ec3": [[31, "npu"]], "\u4f7f\u7528 Python": [[37, "python"]], "\u4f7f\u7528 pip \u5b89\u88c5\uff08\u63a8\u8350\uff09": [[21, "pip"]], "\u4f7f\u7528conda\u521b\u5efa\u73af\u5883": [[45, "conda"]], "\u4f7f\u7528\u5355\u5361\u63a8\u7406": [[13, "id4"]], "\u4f7f\u7528\u547d\u4ee4\u884c\u4e0eLLM\u6a21\u578b\u5bf9\u8bdd": [[22, "id14"]], "\u4f7f\u7528\u591a\u5361\u63a8\u7406": [[13, "id5"]], "\u4f7f\u7528\u6a21\u578b": [[49, "id3"]], "\u4fee\u6539\u955c\u50cf\u6e90": [[57, "id4"]], "\u5168\u6d41\u7a0b": [[58, "id3"]], "\u5168\u6d41\u7a0b\u6607\u817e\u5b9e\u8df5": [[14, null]], "\u5176\u4ed6\u53c2\u6570": [[46, "id4"]], "\u51c6\u5907 WeNet \u6570\u636e\u683c\u5f0f": [[64, "wenet"]], "\u51c6\u5907\u8bad\u7ec3\u6570\u636e": [[64, "id4"]], "\u5206\u5e03\u5f0f\u8bad\u7ec3": [[31, "id3"]], "\u521b\u5efa\u865a\u62df\u73af\u5883": [[56, "id3"]], "\u524d\u7f6e\u51c6\u5907": [[6, "id2"], [14, "id4"], [49, "id2"], [53, "id2"], [61, "id2"]], "\u529f\u80fd\u6837\u4f8b": [[39, null]], "\u52a0\u8f7d\u6570\u636e\u96c6": [[53, "id4"]], "\u52a0\u8f7d\u6a21\u578b": [[53, "id8"]], "\u52a8\u6001\u5408\u5e76 LoRA \u7684\u63a8\u7406": [[3, "id4"], [14, "lora"], [19, "id4"]], "\u5355\u5361/\u5206\u5e03\u5f0f\u8bad\u7ec3": [[52, "id2"]], "\u5355\u5361\u8bad\u7ec3": [[31, "id2"]], "\u5355\u673a\u591a\u5361\u5fae\u8c03": [[18, null]], "\u5378\u8f7dtransformers": [[56, "id5"]], "\u539f\u59cb\u6a21\u578b\u76f4\u63a5\u63a8\u7406": [[14, "id7"]], "\u53c2\u6570\u8bf4\u660e": [[46, "id2"]], "\u53ef\u89c6\u5316\u8bc4\u4f30\u7ed3\u679c": [[34, "id5"]], "\u542f\u52a8\u8bc4\u4f30": [[34, "id4"]], "\u542f\u52a8\u955c\u50cf": [[5, "id2"]], "\u56fe\u50cf\u5206\u7c7b": [[55, "id7"]], "\u56fe\u50cf\u5904\u7406": [[37, "id2"]], "\u56fe\u50cf\u5904\u7406\u7ed3\u679c": [[37, "id3"]], "\u56fe\u50cf\u8f6c\u56fe\u50cf": [[55, "id8"]], "\u56fe\u50cf\u8f6c\u6587\u672c": [[55, "id14"]], "\u56fe\u751f\u56fe": [[46, "id6"]], "\u5728\u7ebf\u670d\u52a1": [[22, "id11"]], "\u57fa\u4e8e LoRA \u7684 sft \u6307\u4ee4\u5fae\u8c03": [[14, "lora-sft"]], "\u57fa\u4e8e LoRA \u7684\u5fae\u8c03": [[3, "lora"]], "\u57fa\u4e8e LoRA \u7684\u6a21\u578b\u591a\u5361\u5206\u5e03\u5f0f\u5fae\u8c03": [[18, "lora"]], "\u57fa\u4e8e LoRA \u7684\u6a21\u578b\u5fae\u8c03": [[19, "lora"]], "\u591a\u5361 NPU \u6307\u5b9a": [[18, "npu"]], "\u591a\u6a21\u6001": [[55, "id12"]], "\u5b89\u88c5": [[22, "id3"]], "\u5b89\u88c5 Accelerate \u53ca\u4f9d\u8d56\u5305": [[5, "id3"]], "\u5b89\u88c5miniconda": [[45, "miniconda"]], "\u5b89\u88c5stable-diffusion-webui": [[45, "stable-diffusion-webui"]], "\u5b89\u88c5transformers": [[56, "transformers"]], "\u5b89\u88c5\u51c6\u5907": [[14, "id5"]], "\u5b89\u88c5\u5fc5\u8981\u5e93": [[53, "id3"]], "\u5b89\u88c5\u6307\u5357": [[2, null], [5, null], [9, null], [12, null], [17, null], [21, null], [24, null], [27, null], [30, null], [33, null], [36, null], [42, null], [45, null], [48, null], [51, null], [56, null], [60, null], [63, null], [66, null]], "\u5b89\u88c5\u6821\u9a8c": [[2, "id3"], [12, "id2"], [17, "id2"], [21, "id3"], [24, "id3"], [30, "id3"], [33, "id3"], [36, "id4"], [51, "id3"], [63, "id3"], [66, "id3"]], "\u5b8c\u6574\u811a\u672c": [[19, "id5"]], "\u5bfc\u5165 torch-npu": [[52, "torch-npu"]], "\u5bfc\u51fa\u8bad\u7ec3\u597d\u7684\u6a21\u578b": [[64, "id9"]], "\u5f00\u542f\u5fae\u8c03": [[19, "id3"]], "\u5fae\u4fe1\u7fa4": [[40, "id1"]], "\u5fae\u8c03/\u8bad\u7ec3\u62a5\u9519": [[15, "id3"]], "\u5fae\u8c03\u9884\u8bad\u7ec3\u6a21\u578b": [[53, null]], "\u5feb\u901f\u5b89\u88c5\u6607\u817e\u73af\u5883": [[7, null]], "\u5feb\u901f\u5f00\u59cb": [[3, null], [6, null], [10, null], [13, null], [19, null], [22, null], [25, null], [28, null], [31, null], [34, null], [37, null], [43, null], [46, null], [49, null], [52, null], [58, null], [61, null], [64, null], [67, null]], "\u6279\u91cf\u9884\u6d4b\u548c\u8bad\u7ec3\u6548\u679c\u8bc4\u4f30": [[14, "id10"]], "\u63a8\u7406": [[13, "id3"], [55, null]], "\u63a8\u7406\u53ca\u5fae\u8c03\u811a\u672c": [[19, "id6"]], "\u63a8\u7406\u62a5\u9519": [[15, "id2"]], "\u63d0\u53d6\u6700\u4f73 cmvn \u7279\u5f81\uff08\u53ef\u9009\uff09": [[64, "cmvn"]], "\u6587\u672c\u5206\u7c7b": [[55, "id10"]], "\u6587\u672c\u751f\u6210": [[55, "id11"]], "\u6587\u672c\u8f6c\u97f3\u9891": [[55, "id5"]], "\u6587\u751f\u56fe": [[3, "id2"], [46, "id5"]], "\u6607\u817e\u5f00\u6e90": [[0, null]], "\u6607\u817e\u73af\u5883\u5b89\u88c5": [[2, "id2"], [30, "id2"], [33, "id2"], [36, "id2"], [48, "id2"], [51, "id2"], [60, "id2"], [63, "id2"], [66, "id2"]], "\u6784\u5efa\u955c\u50cf": [[22, "id8"]], "\u6982\u89c8": [[34, "id2"]], "\u6a21\u578b\u51c6\u5907": [[28, "id3"]], "\u6a21\u578b\u53ca\u6570\u636e\u96c6\u4e0b\u8f7d": [[3, "download"]], "\u6a21\u578b\u63a8\u7406": [[28, "id7"], [52, "id4"]], "\u6a21\u578b\u6587\u4ef6\u51c6\u5907\u53ca\u91cf\u5316": [[13, "id2"]], "\u6a21\u578b\u83b7\u53d6": [[57, null]], "\u6a21\u578b\u8bad\u7ec3": [[6, "id3"], [61, "id3"], [64, "id5"]], "\u6a21\u578b\u8bc4\u4f30": [[31, "id5"], [53, "id10"]], "\u6a21\u578b\u9a8c\u8bc1": [[52, "id3"]], "\u6d4b\u8bd5\u63a8\u7406": [[64, "id7"]], "\u6e90\u7801\u7f16\u8bd1": [[36, "id3"]], "\u73af\u5883\u51c6\u5907": [[22, "id4"], [28, "id2"]], "\u73af\u5883\u53d8\u91cf\u914d\u7f6e": [[19, "id2"]], "\u751f\u6210 token \u5b57\u5178": [[64, "token"]], "\u76f4\u63a5\u4e0b\u8f7d": [[57, "id3"]], "\u79bb\u7ebf\u6279\u5904\u7406": [[22, "id10"]], "\u7c7b\u522b\u6807\u7b7e": [[28, "id5"]], "\u81ea\u52a8\u8bed\u97f3\u8bc6\u522b": [[67, "id6"]], "\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u6784\u5efa": [[14, "id8"]], "\u81ea\u7136\u8bed\u8a00\u5904\u7406": [[55, "id9"]], "\u89c6\u89c9\u95ee\u7b54": [[55, "id13"]], "\u8ba1\u7b97\u673a\u89c6\u89c9": [[55, "id6"]], "\u8bad\u7ec3": [[53, "id7"]], "\u8bbe\u5907\u6307\u5b9a": [[15, "id1"]], "\u8bed\u97f3\u6587\u4ef6\u9884\u5904\u7406": [[67, "id5"]], "\u8d85\u53c2\u6570\u8c03\u4f18": [[53, "id9"]], "\u8fdb\u9636-\u5927\u6a21\u578b\u4e3b\u6d41\u8bc4\u6d4b benchmark": [[14, "benchmark"]], "\u914d\u7f6e\u6587\u4ef6\u51c6\u5907": [[14, "id6"]], "\u914d\u7f6e\u8bc4\u4f30\u4efb\u52a1": [[34, "id3"]], "\u91cf\u5316": [[22, "id15"]], "\u95ee\u9898\u53cd\u9988": [[15, "id4"]], "\u97f3\u9891": [[55, "id3"]], "\u97f3\u9891\u8bc6\u522b": [[55, "id4"]], "\u9884\u5904\u7406\u6570\u636e\u96c6": [[53, "id5"]], "\u9884\u8bad\u7ec3\u5168\u6d41\u7a0b": [[53, "id11"]], "\u9a8c\u8bc1\u5b89\u88c5": [[56, "id4"]]}, "docnames": ["index", "sources/Diffusers/index", "sources/Diffusers/install", "sources/Diffusers/quick_start", "sources/accelerate/index", "sources/accelerate/install", "sources/accelerate/quick_start", "sources/ascend/quick_install", "sources/deepspeed/index", "sources/deepspeed/install", "sources/deepspeed/quick_start", "sources/llama_cpp/index", "sources/llama_cpp/install", "sources/llama_cpp/quick_start", "sources/llamafactory/example", "sources/llamafactory/faq", "sources/llamafactory/index", "sources/llamafactory/install", "sources/llamafactory/multi_npu", "sources/llamafactory/quick_start", "sources/lm_deploy/index", "sources/lm_deploy/install", "sources/lm_deploy/quick_start", "sources/lm_evaluation/index", "sources/lm_evaluation/install", "sources/lm_evaluation/quick_start", "sources/onnxruntime/index", "sources/onnxruntime/install", "sources/onnxruntime/quick_start", "sources/open_clip/index", "sources/open_clip/install", "sources/open_clip/quick_start", "sources/opencompass/index", "sources/opencompass/install", "sources/opencompass/quick_start", "sources/opencv/index", "sources/opencv/install", "sources/opencv/quick_start", "sources/pytorch/api_doc", "sources/pytorch/examples", "sources/pytorch/faq", "sources/pytorch/index", "sources/pytorch/install", "sources/pytorch/quick_start", "sources/sd_webui/index", "sources/sd_webui/install", "sources/sd_webui/quick_start", "sources/sentence_transformers/index", "sources/sentence_transformers/install", "sources/sentence_transformers/quick_start", "sources/timm/index", "sources/timm/install", "sources/timm/quick_start", "sources/transformers/fine-tune", "sources/transformers/index", "sources/transformers/inference", "sources/transformers/install", "sources/transformers/modeldownload", "sources/transformers/quick_start", "sources/trl/index", "sources/trl/install", "sources/trl/quick_start", "sources/wenet/index", "sources/wenet/install", "sources/wenet/quick_start", "sources/whisper_cpp/index", "sources/whisper_cpp/install", "sources/whisper_cpp/quick_start"], "envversion": {"sphinx": 64, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["index.rst", "sources/Diffusers/index.rst", "sources/Diffusers/install.rst", "sources/Diffusers/quick_start.rst", "sources/accelerate/index.rst", "sources/accelerate/install.rst", "sources/accelerate/quick_start.rst", "sources/ascend/quick_install.rst", "sources/deepspeed/index.rst", "sources/deepspeed/install.rst", "sources/deepspeed/quick_start.rst", "sources/llama_cpp/index.rst", "sources/llama_cpp/install.rst", "sources/llama_cpp/quick_start.rst", "sources/llamafactory/example.rst", "sources/llamafactory/faq.rst", "sources/llamafactory/index.rst", "sources/llamafactory/install.rst", "sources/llamafactory/multi_npu.rst", "sources/llamafactory/quick_start.rst", "sources/lm_deploy/index.rst", "sources/lm_deploy/install.rst", "sources/lm_deploy/quick_start.rst", "sources/lm_evaluation/index.rst", "sources/lm_evaluation/install.rst", "sources/lm_evaluation/quick_start.rst", "sources/onnxruntime/index.rst", "sources/onnxruntime/install.rst", "sources/onnxruntime/quick_start.rst", "sources/open_clip/index.rst", "sources/open_clip/install.rst", "sources/open_clip/quick_start.rst", "sources/opencompass/index.rst", "sources/opencompass/install.rst", "sources/opencompass/quick_start.rst", "sources/opencv/index.rst", "sources/opencv/install.rst", "sources/opencv/quick_start.rst", "sources/pytorch/api_doc.rst", "sources/pytorch/examples.rst", "sources/pytorch/faq.rst", "sources/pytorch/index.rst", "sources/pytorch/install.rst", "sources/pytorch/quick_start.rst", "sources/sd_webui/index.rst", "sources/sd_webui/install.rst", "sources/sd_webui/quick_start.rst", "sources/sentence_transformers/index.rst", "sources/sentence_transformers/install.rst", "sources/sentence_transformers/quick_start.rst", "sources/timm/index.rst", "sources/timm/install.rst", "sources/timm/quick_start.rst", "sources/transformers/fine-tune.rst", "sources/transformers/index.rst", "sources/transformers/inference.rst", "sources/transformers/install.rst", "sources/transformers/modeldownload.rst", "sources/transformers/quick_start.rst", "sources/trl/index.rst", "sources/trl/install.rst", "sources/trl/quick_start.rst", "sources/wenet/index.rst", "sources/wenet/install.rst", "sources/wenet/quick_start.rst", "sources/whisper_cpp/index.rst", "sources/whisper_cpp/install.rst", "sources/whisper_cpp/quick_start.rst"], "indexentries": {"_npu_dropout()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu._npu_dropout", false]], "built-in function": [[38, "npu_batch_nms", false], [38, "npu_bert_apply_adam", false]], "copy_memory_()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.copy_memory_", false]], "empty_with_format()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.empty_with_format", false]], "fast_gelu()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.fast_gelu", false]], "npu_alloc_float_status()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_alloc_float_status", false]], "npu_anchor_response_flags()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_anchor_response_flags", false]], "npu_apply_adam()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_apply_adam", false]], "npu_batch_nms()": [[38, "npu_batch_nms", false]], "npu_bert_apply_adam()": [[38, "npu_bert_apply_adam", false]], "npu_bmmv2()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_bmmV2", false]], "npu_bounding_box_decode()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_bounding_box_decode", false]], "npu_broadcast()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_broadcast", false]], "npu_ciou()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_ciou", false]], "npu_clear_float_status()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_clear_float_status", false]], "npu_confusion_transpose()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_confusion_transpose", false]], "npu_conv2d()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_conv2d", false]], "npu_conv3d()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_conv3d", false]], "npu_conv_transpose2d()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_conv_transpose2d", false]], "npu_convolution()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_convolution", false]], "npu_convolution_transpose()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_convolution_transpose", false]], "npu_deformable_conv2d()\uff08\u5728 torch_npu \u6a21\u5757\u4e2d\uff09": [[38, "torch_npu.npu_deformable_conv2d", false]]}, "objects": {"": [[38, 0, 1, "", "npu_batch_nms"], [38, 0, 1, "", "npu_bert_apply_adam"]], "torch_npu": [[38, 0, 1, "", "_npu_dropout"], [38, 0, 1, "", "copy_memory_"], [38, 0, 1, "", "empty_with_format"], [38, 0, 1, "", "fast_gelu"], [38, 0, 1, "", "npu_alloc_float_status"], [38, 0, 1, "", "npu_anchor_response_flags"], [38, 0, 1, "", "npu_apply_adam"], [38, 0, 1, "", "npu_bmmV2"], [38, 0, 1, "", "npu_bounding_box_decode"], [38, 0, 1, "", "npu_broadcast"], [38, 0, 1, "", "npu_ciou"], [38, 0, 1, "", "npu_clear_float_status"], [38, 0, 1, "", "npu_confusion_transpose"], [38, 0, 1, "", "npu_conv2d"], [38, 0, 1, "", "npu_conv3d"], [38, 0, 1, "", "npu_conv_transpose2d"], [38, 0, 1, "", "npu_convolution"], [38, 0, 1, "", "npu_convolution_transpose"], [38, 0, 1, "", "npu_deformable_conv2d"]]}, "objnames": {"0": ["py", "function", "Python \u51fd\u6570"]}, "objtypes": {"0": "py:function"}, "terms": {"00": [6, 7, 9, 13, 14, 49, 53, 61, 67], "000": [13, 43, 52, 67], "0000": [7, 38], "000000": 13, "000000039769": 55, "000010": 13, "0001": 19, "000946458": 52, "001": [10, 39, 43, 55], "0010": 31, "0010740706": 52, "0012754521": 52, "0019760131836": 61, "002": 55, "00210": 43, "003": 55, "0030552391": 52, "0041": 25, "0049": 31, "0070": 25, "0085": 25, "0088": 25, "0089": 25, "008s": 52, "01": [6, 13, 61], "011": 52, "0122": 25, "012223720550537": 61, "013": 55, "01375547": 49, "0143": 25, "015390396118164": 61, "01562478": 49, "0165100097656": 61, "0179": 25, "02": [6, 13, 14, 61], "0200": 25, "0212": 25, "0246": 25, "0250": 25, "026123": 53, "0268": 25, "0274658203125": 61, "0276": 25, "0278": 25, "0284": 25, "0286": 25, "0291": 25, "0293": 25, "03": [6, 7, 13, 22, 31, 38, 53, 61], "0307": 25, "0310": 25, "0312": 25, "03137118": 49, "0320": 25, "0321": 25, "0325": 25, "03337045": 49, "0334": 25, "0337": 25, "0343": 25, "0345": 25, "0351": 25, "0356": 25, "0361": 25, "0377": 25, "0383": 25, "0384": 25, "0387": 25, "0392": 25, "0398": 25, "04": [3, 5, 7, 13, 15, 52, 61], "040": 52, "0404": 25, "0406": 25, "0417": 25, "04230832": 49, "0429": 25, "04301599": 49, "0431": 25, "0436": 25, "0437": 25, "0438": 25, "04393559": 49, "0446": 25, "0451": 25, "0453": 25, "046": 55, "0469": 25, "0473": 25, "0479": 25, "0480": 25, "0482": 25, "04824848": 49, "0483": 25, "049": 61, "0493": 61, "0498": 25, "05": [6, 13, 15, 38, 43, 61, 67], "050": 13, "0501": 25, "0502": 25, "0503": 25, "05215353": 49, "05488579": 49, "0551": 61, "0557311197916666": 53, "0558": 38, "05615513": 49, "05640831": 49, "05903088": 49, "06": [34, 38, 53, 61], "06652435": 49, "07": [6, 34, 61], "073": 55, "075": 55, "076": 34, "08": [43, 49], "083s": 52, "08493122": 49, "0865316167473793": 61, "088": 55, "09556": 43, "0e": 13, "0f": [10, 43], "0x40000": 38, "0x7020010": 15, "10": [0, 2, 7, 9, 10, 12, 13, 15, 17, 19, 30, 33, 36, 38, 39, 42, 43, 45, 49, 51, 52, 56, 61, 63], "100": [3, 6, 10, 12, 13, 31, 34, 36, 38, 43, 49, 52, 53, 61, 66], "1000": [10, 19, 31, 43, 52, 53, 61], "10000": [10, 31, 43], "10029524": 49, "10205095": 49, "1024": [3, 13, 14, 19, 28, 31, 43], "1030": 13, "1056": 38, "10s": 15, "11": [13, 15, 17, 49, 53, 61, 67], "1102": 15, "1102681159973145": 61, "111": 13, "112": 13, "11419677734375": 61, "118": 13, "12": [6, 10, 13, 21, 34, 38, 43], "120": [10, 38, 43], "1218": 38, "123": 31, "12379668": 49, "124": 13, "125": [52, 67], "1260": 13, "127": [18, 19, 38], "1273": 38, "128": [13, 31], "128000": [13, 53], "128009": 13, "128256": 13, "13": [0, 13, 15, 31, 52, 53, 63, 67], "131": 67, "13152": 13, "13313": 13, "1355": 61, "13623": 13, "139": 61, "14": [9, 13, 17, 31, 34, 38, 43, 61], "143": 61, "14336": 13, "144": [55, 61], "147": 67, "15": [6, 10, 13, 43, 49, 53, 61, 67], "150": [14, 52], "1500": 67, "15000": 43, "1502880093958574": 57, "151": 38, "15169": 7, "1518": 38, "153": 67, "15317": 13, "15380859375": 61, "15474730730056763": 61, "1548289060592651": 61, "155628": 53, "157": [38, 52], "15777587890625": 61, "159": 38, "16": [10, 13, 14, 19, 38, 43, 55, 67], "16000": 67, "1607": 67, "162": 31, "1634": 14, "167": 15, "16g": [5, 17], "17": [13, 15, 42, 43, 67], "1725": 6, "1728907816": 13, "175": [31, 38], "1753845214844": 61, "176000": 67, "18": [13, 22, 34, 38, 67], "180": 37, "180000000": 19, "1812": 53, "1842": 15, "186": 38, "18659590184688568": 61, "188": 38, "1891": 52, "1895003467798233": 61, "19": [6, 13, 17, 61, 67], "192": [13, 67], "196": 14, "19788": 53, "1999": 43, "1_8b": 34, "1d": 38, "1d7fe4": 34, "1e": [3, 10, 31, 38, 43], "1f": 43, "1ubuntu1": 13, "20": [7, 9, 13, 14, 37, 39, 43, 52], "2000": [10, 43], "2020": 43, "20200220_120000": 34, "2023": 34, "20230220_183030": 34, "2024": [15, 43, 49], "204": 14, "2048": [13, 34], "204kb": 6, "21": [13, 15, 38, 43, 52, 53], "21268120": 52, "213kb": 6, "2168960571289": 61, "2185": 25, "21993154287338257": 61, "22": [6, 7, 13], "223": [38, 67], "224": [28, 31, 52, 55], "225": [28, 52], "226": 13, "2261535": 14, "22747": 53, "229": [28, 52], "23": [6, 7, 22, 34, 45], "2303": 43, "23089599609375": 61, "2312": 43, "2324": 38, "2359223c1837a7587402bda0f2643382a6eefeab": 55, "237kb": 6, "238": 13, "23mb": 6, "24": [13, 30], "24127893149852753": 61, "2425": 25, "242685556411743": 61, "2432083934545517": 61, "246370": 6, "247": 38, "2478": 38, "2496635913848877": 61, "25": [34, 37, 52], "250": 52, "253": 38, "255": [28, 38], "256": [13, 52, 58], "2563016414642334": 61, "2568": 15, "256x256": 31, "2573": 38, "26": [6, 15, 30, 33, 51], "26203155517578": 61, "27": [13, 43, 52, 53], "270": 37, "2700": 25, "2733": 38, "27545": 53, "27701": 6, "279": 53, "28": [13, 67], "2800": 25, "280147": 13, "283": 61, "2854": 67, "2857": 25, "288kb": 6, "289": 52, "29": [13, 15], "290": 15, "291": [13, 61], "29246845841407776": 61, "2941": 25, "29500": 39, "2963": 25, "298": 61, "2b": 22, "2d": 38, "2f": 43, "2vfqelfyfyji": 57, "30": [3, 10, 38, 43, 53, 67], "300i": 7, "300t": 12, "303": 31, "3031154274940491": 61, "3036": 25, "304": 53, "305": 61, "3058021068573": 61, "307": 61, "308k": 6, "31": [13, 17], "311": 53, "313": 61, "3131723403930664": 61, "3139": 53, "314": 61, "315": [53, 61], "3156": 53, "3158": 25, "319": 61, "32": [0, 6, 12, 13, 14, 31, 34, 38, 43, 67], "321538": 38, "3217742443084717": 61, "322": 61, "323": [53, 61], "32400": 38, "327": 61, "32768": 7, "33": [13, 31, 64], "3300": 25, "331": 61, "33126339316368103": 61, "332": 61, "3333": 25, "3350830078125": 61, "3378": 6, "34": 10, "343": 61, "344": 61, "346": 61, "35": [6, 13], "3503": 13, "3519": 25, "3520": 13, "353": 61, "3532": 25, "3553": 6, "3561025857925415": 61, "3572": 25, "36": 49, "3600": 25, "364": 61, "3647": 38, "3648269176483154": 61, "3668": 6, "3676": 25, "3689": 25, "369": 53, "369903326034546": 61, "37": 67, "3705289363861084": 61, "375": 53, "3761059045791626": 61, "3788": 15, "378931999206543": 61, "3819": 25, "3826913833618164": 61, "3839": 15, "384": 53, "3840": 15, "384589433670044": 61, "3883": 61, "3892": 25, "3901": 25, "393424": 34, "3935241699219": 61, "394": 52, "3947": 25, "3967204988002777": 61, "399s": 52, "3d": 38, "3e": [10, 43], "3f": [10, 43], "3kb": 6, "40": [7, 13, 15], "400": 13, "400s": 52, "406": [28, 52], "4062": 53, "4064": 25, "4077": 15, "408": 6, "4082": 15, "40937": 36, "4094": 38, "4096": 13, "41": [15, 17], "4106750488281": 61, "4141845703125": 61, "4145514667034149": 61, "4157": 25, "418": 52, "42": [3, 6, 53], "4228": 15, "4231": 25, "425": 55, "42580509185791": 61, "427": [31, 52], "4277": 25, "43": 67, "4300": 25, "430453300476074": 61, "4314": 25, "4325330853462219": 61, "4336": 25, "435": 52, "436467409133911": 61, "436kb": 6, "436m": 6, "438": 52, "44": 13, "4403": 38, "442": 55, "443": 6, "4444": 25, "448": 67, "45": 67, "4548392295837402": 61, "455843925476074": 61, "456": [28, 52], "4574": 25, "4581611156463623": 61, "45c443b316737a4ab6e40413d7794a7f5657c19f": 45, "45kb": 6, "4632484912872314": 61, "4643": 15, "4667": 25, "4679": 25, "4682": 52, "47": 53, "4700": 25, "4704": 15, "4724": 25, "4724509716033936": 61, "4759": 25, "4765": 52, "479": 55, "4796": 25, "48": [15, 43], "4806": 25, "4807929992675781": 61, "48211a1594f1321b00f14c9f7a5b4813144b2fb9": 45, "485": [28, 52], "4857": 25, "4872": 38, "487535": 14, "4897": 25, "49": [6, 10, 13], "4912": 25, "4922": 25, "4929": 52, "4953": 25, "499000": 53, "4d": 38, "4f": 28, "4kb": 6, "50": [13, 14, 34, 52], "500": [3, 10, 19, 43], "5000": [25, 38], "50000": 53, "500000": 13, "50000000": [10, 43], "504": 55, "5049": 25, "507018": 15, "5101": 25, "5109": 6, "5115": 25, "5118070840835571": 61, "512": [13, 43, 52, 67], "513734817504883": 61, "5160": 25, "5185": 25, "51864": 67, "52": [10, 14], "527": 53, "53": [13, 15], "530369997024536": 61, "5327": 25, "5376": 25, "539": 53, "54": [13, 14, 15], "5400": 25, "5430492162704468": 61, "55": [14, 34, 53], "5519": 53, "55230712890625": 61, "5593773722648621": 61, "56": [10, 34, 67], "5637512207031": 61, "5697": 25, "57": [10, 15], "570b": 6, "5725": 25, "574000": 53, "576409999999996": 14, "5769": 61, "5799999833106995": 61, "5823": 25, "59": 10, "590000": 53, "5967": 61, "5991": 38, "5b": [25, 34], "5d": [10, 43], "5kb": 6, "5s": [10, 43], "60": [14, 38], "6089": 61, "61": 14, "6119": 25, "6126": 61, "6155031323432922": 61, "6159": 61, "6191915273666382": 61, "61it": 49, "62": 31, "62135": 61, "6214": 25, "6220334768295288": 61, "6225000023841858": 61, "623": 49, "6248273253440857": 61, "625": 61, "6269999742507935": 61, "6284": 61, "62930297851562": 61, "63": 52, "632": 67, "635": 61, "6392844319343567": 61, "6398493647575378": 61, "64": [7, 13, 15, 31, 34, 42, 52, 55], "6439999938011169": 61, "645": 15, "6460000276565552": 61, "649k": 6, "65": [10, 13, 14, 31], "650000": 53, "6575000286102295": 61, "6594": 52, "6598": 61, "66": [10, 61], "6600000262260437": 61, "6624": 25, "6694": 25, "67": [10, 14], "6700000166893005": 61, "68": 67, "6800": 25, "6840": 38, "68mb": 6, "69": 13, "6984029412269592": 61, "699462890625": 61, "699999988079071": 61, "69kb": 6, "6f7db241d2f8ba7457bac5ca9753331f0c266917": 45, "7007": [14, 18, 19], "70209": 15, "708": 52, "7085975": 14, "7173767089844": 61, "7176669239997864": 61, "719s": 52, "72": [6, 36, 67], "7240013480186462": 61, "73": [14, 61], "7300000190734863": 61, "73426": 6, "736572265625": 61, "74": 61, "7424671645634816e": 61, "75": [6, 34, 38, 52, 61, 67], "750": 52, "7577209472656": 61, "75kb": 6, "76": 61, "766357421875": 61, "768": 31, "76898193359375": 61, "77": [6, 38], "7733": 38, "774658768993046e": 61, "7790046334266663": 61, "7793089151382446": 61, "78": [52, 61], "7860": 17, "789354427392000": 53, "79": [6, 14, 38], "7987264394760132": 61, "7b": [13, 14, 19, 22, 55, 57], "7e5la4178elcjcu": 57, "7eoxlqbqcsdqmcx0ve8oia3qej": 57, "7eoyk38o9nnlkxwoka7yqxwvuvrqibvmjyhke8x": 57, "7ermo7m6ii595puox7o3bvypfyqf1syrp05xcr9t2": 57, "7k": 6, "80": [61, 67], "800": [3, 13], "8000": [13, 17], "800t": [7, 22], "801": 53, "8014705882352942": 6, "806850373422611e": 61, "81": 34, "8130187988281": 61, "814": 53, "8192": 13, "81kb": 6, "83": [14, 67], "8312": 43, "8355176448822021": 61, "839041977852176e": 61, "84": [10, 43, 61], "8439306358381503": 6, "85": 61, "8571": 38, "8578431372549019": 6, "86": 52, "87": [52, 61, 67], "8700980392156863": 6, "87123358228174e": 61, "875": 52, "878154695": 52, "88": 61, "882": 7, "89": 52, "892": 61, "89263": 53, "893": 61, "8975265017667845": 6, "8978271484375": 61, "8b": [13, 22, 34, 53, 55, 57, 58], "8e707118": 13, "8kb": 6, "90": [17, 37, 61, 66], "9031982421875": 61, "903425186711305e": 61, "9074": 13, "9087779690189329": 6, "91": 6, "910b": [5, 22], "9117431640625": 61, "91485": 53, "91998291015625": 61, "92": 53, "926": 52, "927000": 15, "93": 52, "930": 67, "93561679114087e": 61, "94": 67, "941235": 14, "942": 52, "95": [34, 66, 67], "950": 13, "96": [13, 52], "960": 52, "96641540527344": 61, "967808395570435e": 61, "97": [12, 13, 31, 52], "975": 53, "98": [12, 38, 52], "99": [38, 67], "9941": 31, "99441528320312": 61, "994618": 53, "999": [10, 43], "9998704791069031": 56, "__": [10, 43], "__call__": 6, "__init__": [10, 39, 43], "__main__": [10, 14, 22, 28, 39, 43], "__name__": [10, 14, 22, 28, 39, 43], "__post_init__": 43, "__version__": [21, 30, 33, 43, 51, 63], "_before_": 43, "_glibcxx_use_cxx11_abi": 42, "_lazymodul": 43, "_local_rank": [10, 43], "_most_": 43, "_mp_fn": 43, "_npu_dropout": [38, 41], "_orig_mod": 43, "_sampl": 15, "_subclass": 38, "a2": [7, 12, 22], "aarch64": [7, 13, 17, 22, 42, 45], "ab527a9a6d347f364e3d185ba6d714e22d80cb3c": 45, "abl": [6, 56], "about": [37, 43], "abov": 43, "abs": 43, "absl": [7, 30, 33, 51], "abspath": 43, "abstract_algebra": 25, "ac": 67, "acc": [25, 52], "acceler": [0, 6, 7, 10, 14, 17, 33, 43, 53], "accelerator_project_config": 43, "acceleratorst": 43, "accelr": 6, "accept": [10, 43], "accord": 43, "account": [13, 43], "accumul": 43, "accur": 43, "accuraci": [6, 10, 34, 43, 53, 61], "acl": 15, "acl_format": 38, "across": 43, "action": [10, 43], "activ": [2, 17, 21, 30, 33, 36, 45, 51, 56, 63], "actual": 15, "adam": [10, 38, 43], "adam_beta1": 43, "adam_beta2": 43, "adam_epsilon": 43, "adam_mod": 38, "adam_weight_decay": 43, "adamw": 43, "adamw8bit": 43, "adapt": [13, 43], "adapter_name_or_path": [14, 19], "add": [37, 43], "add_argu": [10, 37, 43], "add_config_argu": [10, 43], "add_generation_prompt": 58, "add_imag": 43, "add_nois": 43, "adding": 67, "addit": 38, "addr": 15, "adjust": 43, "advic": 58, "affair": 53, "after": [6, 15, 43, 53], "afterward": 43, "ag": 7, "again": [15, 43], "against": 43, "agre": 43, "ai": [0, 7, 14, 45, 46], "aicor": 7, "aishel": 64, "aishell_data_prep": 64, "all": [10, 43, 49], "all_result": 14, "allgather_bucket_s": [10, 43], "allgather_partit": [10, 43], "alloc": 43, "allow": 43, "allow_tf32": 43, "along": 43, "alpaca_en_demo": [14, 19], "alpaca_gpt4_zh": 14, "alpha001": 17, "alpha002": 9, "alpha003": 63, "alreadi": [6, 43], "also": 43, "altern": 52, "alway": [43, 58], "american": 67, "amp": [31, 43, 52], "amper": 43, "an": [10, 14, 43], "anaconda": 45, "analysi": 56, "anatomi": [25, 46], "anchor": 38, "and": [3, 6, 10, 13, 14, 15, 24, 30, 31, 33, 37, 39, 43, 51, 53, 56, 58, 61, 67], "anger": 53, "ani": [43, 52, 58], "anoth": 43, "answer": 55, "anyth": 43, "anywher": 53, "apach": 43, "api": [15, 16, 33, 37, 41, 61], "api_impl": 15, "api_key": 14, "api_port": 14, "api_serv": 22, "apolog": 53, "app": [12, 17], "append": [10, 14, 43], "appli": 13, "applic": [10, 14, 43, 49], "apply_chat_templ": [24, 58], "apt": 7, "ar": 67, "arch": [7, 13], "architectur": 13, "archiv": 45, "are": [6, 10, 14, 22, 43, 49, 53, 55, 56, 58, 61], "arena_extend_strategi": 28, "arg": [10, 17, 37, 39, 43, 53], "argc": 37, "argmax": [28, 43, 53], "argpars": [10, 31, 37, 43], "argument": [10, 43], "argumentpars": [10, 37, 43], "argv": [37, 43], "arm_fma": [13, 67], "around": 43, "arr": 13, "array": 28, "artifici": 14, "arxiv": 43, "as": [10, 28, 31, 37, 39, 43, 49, 52, 53, 58], "asap": 55, "asarray": 43, "ascend": [5, 7, 12, 14, 17, 22, 27, 28, 37, 39, 45, 48, 60], "ascend_cann": 9, "ascend_instal": [5, 17], "ascend_rt_visible_devic": [14, 15, 18, 19, 64], "ascend_visible_devic": 22, "ascendai": [2, 5, 12, 17, 36], "asctim": 43, "ask": [53, 58, 67], "asr": [64, 67], "asr_dummi": 55, "assert": [15, 39, 43], "assertionerror": 15, "asset": 45, "assign": 43, "assist": [14, 55, 58], "astronomi": 25, "astyp": [28, 37], "async_io": 9, "at": [6, 9, 10, 15, 37, 39, 43, 53, 56, 61], "atan_sub_flag": 38, "atb": 22, "atlas": [7, 12, 22], "attent": 13, "attention_mask": 53, "attn": [13, 67], "attr": [7, 30, 33, 51], "attribut": [15, 43], "attributeerror": 15, "audienc": 13, "audio": 55, "aura": 3, "authent": 43, "author": [14, 43], "auto": [14, 24, 34, 43, 53, 58], "auto_awq": 22, "autocast": [31, 43], "autocast_ctx": 43, "autoconfig": 43, "autodispatchbelowadinplaceorview": 61, "autodispatchbelowautograd": 61, "autoencoderkl": 43, "automat": [43, 55, 64, 67], "automatic1111": 45, "automodelforcausallm": [43, 53, 54], "automodelforsequenceclassif": 56, "autononvariabletypemod": 61, "autotoken": [43, 53, 56, 58], "avail": [43, 52, 56], "averag": 14, "average_checkpoint": 64, "average_num": 64, "avg_loss": 43, "avoid": [6, 43, 53], "avx": [13, 67], "avx2": [13, 67], "avx512": [13, 67], "avx512_bf16": 13, "avx512_vbmi": 13, "avx512_vnni": 13, "avx_vnni": 13, "away": 43, "awk": 18, "axi": [28, 53], "b86k": 31, "bac009s0002w0122": 64, "back": [43, 53], "backend": [10, 22, 31, 43, 67], "backend_config": 22, "backpropag": 43, "backward": [10, 38, 39, 43], "bad": [46, 53], "bar": 43, "bark": 55, "barrier": [10, 43], "base": [3, 6, 10, 34, 38, 43, 55, 56, 67], "base_model": 43, "base_op": 15, "base_url": 14, "basenam": 43, "bash": [5, 17, 22, 45, 64], "bashrc": [7, 57], "bashrcsourc": 7, "bashsudo": 7, "basi": 43, "basicconfig": 43, "batch": [10, 31, 34, 38, 43, 49, 52, 53], "batch_correct": [10, 43], "batch_siz": [10, 14, 24, 25, 31, 38, 43], "batchd": 67, "bb6e834e9c70d9c27d0dc3ecedeebeaeb1ffad6b": 45, "bbox": 38, "be": [6, 9, 10, 12, 13, 14, 15, 43, 53, 55, 56, 58, 61], "beam": 67, "bearer": 43, "been": [6, 43], "befor": [6, 43], "begin_of_text": 13, "behavior": 43, "behind": [43, 53], "being": 43, "beit": 55, "below": [43, 61], "bench": 12, "benchmark": 16, "benefit": 43, "bert": [6, 56], "bertforsequenceclassif": [6, 56], "berttokenizerfast": 6, "best": 67, "beta": [10, 43, 55], "beta1": [22, 38, 43], "beta1_pow": 38, "beta2": [38, 43], "beta2_pow": 38, "better": 43, "between": 43, "bf16": [10, 43], "bfloat16": [10, 43, 53, 58], "bfloat16_en": [10, 43], "bias": [6, 38, 53, 56], "bicub": 52, "bilinear": 43, "bin": [5, 7, 12, 13, 17, 31, 36, 43, 45, 52, 55, 64, 66, 67], "binari": 45, "bird": [10, 43, 55], "bit": [43, 67], "bitsandbyt": 43, "blas": [13, 67], "blip": [3, 43, 45], "block": 43, "block_count": 13, "block_siz": 43, "blog": 43, "blood": 53, "bnb": 43, "board": [16, 53], "bodi": 52, "bool": [38, 43], "bos": 13, "bos_token_id": 13, "both": 43, "box": 38, "box1": 38, "box2": 38, "boxes1": 38, "boxes2": 38, "bpe": 13, "bpw": 13, "branch": [24, 43, 45, 52], "break": 43, "bright": 52, "brokenpipeerror": [10, 43], "brown": [49, 52], "bsz": 43, "buffer": [13, 67], "bug": 15, "build": [12, 13, 17, 22, 27, 36, 61, 66, 67], "build_doc": 36, "build_exampl": 36, "build_opencv_dnn": 36, "build_opencv_fac": 36, "build_opencv_features2d": 36, "build_opencv_wechat_qrcod": 36, "build_opencv_xfeatures2d": 36, "build_shared_lib": 27, "builder": 6, "built": [12, 13, 36, 66], "bus": 7, "business_eth": 25, "but": [39, 43, 53, 55], "by": [6, 14, 43, 49], "c1": 7, "c10": [15, 61], "cach": [13, 17, 43, 67], "cache_dir": [31, 43, 57], "cache_request": 24, "caida": 55, "calcul": 43, "call": [6, 15, 43], "can": [6, 10, 12, 13, 43, 49, 61, 67], "candidate_label": 55, "cann": [2, 5, 7, 12, 13, 14, 17, 24, 27, 30, 33, 36, 37, 42, 48, 51, 60, 63, 66, 67], "cann_interfac": 37, "cann_vers": 7, "cannexecutionprovid": 28, "cannot": [22, 43], "cap": 52, "capac": [10, 43], "capacity_factor": [10, 43], "capit": 55, "caption": [3, 43, 55], "caption_column": [3, 43], "capturelogg": 43, "car": [10, 43], "carrion": 52, "case": [6, 36, 43, 61], "cashier": 53, "cast": 43, "cat": [7, 10, 28, 31, 43, 46], "caus": 61, "causal": [13, 43], "cc": [13, 15], "cchfjzgdpcmlqjhpk6qfck4twgmdyxmg__": 57, "cd": [6, 12, 17, 21, 22, 36, 45, 49, 57, 61, 64, 66], "ceil": 43, "center": [14, 43], "center_crop": 43, "centercrop": 43, "cf": 43, "cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf": 45, "cffi": [7, 30, 33, 51], "cfg": 13, "chain": 43, "chanc": 43, "chang": [14, 27, 43], "change_coordinate_fram": 38, "channel": 43, "char": 37, "chat": [14, 19, 22, 34, 55], "chat_templ": 13, "chatbot": 58, "chatcompletionmessag": 14, "chatglm": 15, "check": 43, "check_env": 22, "check_integr": 24, "check_min_vers": 43, "checkout": 45, "checkpoint": [3, 6, 15, 43, 46, 52, 56], "checkpointing_step": [3, 43], "checkpoints_total_limit": 43, "china": 22, "chines": 14, "chip": [7, 14], "choic": [10, 14, 43], "choos": [13, 43], "chore": 58, "chosen": [43, 61], "chunk": 43, "cifar": [10, 43], "cifar10": [10, 43], "ciou": 38, "ckpt": [3, 43], "cl": 43, "cl1wvzg93bmxvywq2lmxsyw1hbwv0ys5uzxrclyoilcjdb25kaxrpb24ionsirgf0zuxlc3nuagfuijp7ikfxuzpfcg9jafrpbwuioje3mty0mzyymtf9fx1dfq__": 57, "clang": 42, "class": [10, 28, 39, 43], "class_correct": [10, 43], "class_tot": [10, 43], "classes_path": 28, "classic": 55, "classif": [10, 43, 55], "classifi": [6, 43, 55, 56], "classnam": 43, "clavariacea": 52, "clean": 58, "cleaner": 43, "clear": 53, "cli": [12, 13, 14, 15, 17, 19, 43, 57], "client": [14, 15], "clinical_knowledg": 25, "clip": [30, 31, 45], "clip_grad_norm_": 43, "cliptextmodel": 43, "cliptoken": 43, "clm": 43, "clone": [6, 12, 17, 21, 22, 36, 45, 49, 57, 61, 66], "cls_index": 43, "cluster": 52, "cmake": [7, 12, 36, 66], "cmake_build_typ": 36, "cmake_install_prefix": 36, "cmakefil": 61, "cms": 13, "cmvn": 62, "cn": [2, 5, 6, 7, 14, 17, 18, 19, 30, 33, 48, 51, 56, 60], "co": [43, 55, 57, 67], "coco": 55, "cocodataset": 55, "code": [15, 43, 61], "col": 37, "collat": 43, "collate_fn": 43, "collect": 43, "college_biolog": 25, "college_chemistri": 25, "college_computer_sci": 25, "college_mathemat": 25, "college_medicin": 25, "college_phys": 25, "color": 52, "column": 43, "column_nam": 43, "com": [6, 12, 15, 17, 21, 22, 24, 25, 36, 43, 45, 49, 57, 60, 61, 66, 67], "combin": 43, "come": 43, "comet_ml": 43, "comfort": 58, "command": [18, 43], "commandlinepars": 37, "comment": 43, "commit": 43, "commit_messag": 43, "communiti": 43, "compat": 9, "compil": [9, 15, 43], "compiler_depend": [49, 61], "complet": 14, "complianc": 43, "compon": 3, "compos": [10, 17, 43], "comput": [13, 43, 49, 53, 55, 61, 67], "compute_cmvn_stat": 64, "compute_dream_and_update_lat": 43, "compute_metr": [43, 53], "compute_snr": 43, "computer_secur": 25, "computing_embed": 49, "compvi": 43, "concaten": 43, "concatenated_exampl": 43, "conceptual_phys": 25, "concern": 43, "concurr": 43, "conda": [2, 17, 21, 30, 33, 36, 42, 44, 51, 56, 63], "condit": 43, "conf": 43, "config": [6, 10, 12, 27, 34, 43, 55, 57], "config_kwarg": 43, "config_map": 43, "config_nam": 43, "config_overrid": 43, "configur": [10, 43, 52], "conflict": 33, "conserv": 24, "constant": [3, 43], "constant_with_warmup": 43, "construct": 39, "consumpt": 43, "contain": [7, 43], "content": [13, 14, 43, 55, 58], "context": 43, "context_length": 13, "contextlib": 43, "contextmanag": 43, "contiguous_format": 43, "contiguous_gradi": [10, 43], "contrib": 38, "control": 53, "conv": 67, "conv1": [10, 43], "conv2": [10, 43], "conv2d": [10, 43], "convers": [10, 43, 55], "convert": [10, 43, 67], "convert_hf_to_gguf": 13, "convert_tokens_to_id": 58, "convolut": [10, 38, 43], "copi": 43, "copy_memory_": [38, 41], "copy_to": 43, "copyright": 43, "coral": 52, "coreml": 67, "correct": [10, 43], "correct_count": 43, "correct_pr": 43, "correspond": 43, "cosin": [19, 43, 52], "cosine_with_restart": 43, "could": [43, 61], "count": [10, 18, 43, 52], "countri": 67, "cours": 43, "cpp": [0, 13, 15, 67], "cpu": [2, 7, 13, 15, 30, 33, 36, 37, 38, 42, 43, 45, 48, 51, 53, 56, 58, 60, 63, 66, 67], "cpu_adagrad": 9, "cpu_adam": 9, "cpu_lion": 9, "cpu_offload": [10, 43], "cpuexecutionprovid": 28, "creat": [2, 10, 13, 14, 17, 21, 30, 33, 36, 39, 43, 45, 51, 52, 56, 63], "create_model_and_transform": 31, "create_model_card": 43, "create_moe_param_group": [10, 43], "create_repo": 43, "creation": 43, "creativeml": 43, "creatur": 3, "criterion": [10, 43], "critic": 24, "crop": 43, "crop_mod": 52, "crop_pct": 52, "cross": [10, 43, 67], "crossentropyloss": [10, 43], "crosslab": 43, "crowsonkb": 45, "csrc": 15, "csv": [31, 34, 43], "ctc": 64, "ctrl": [14, 19, 34, 43], "ctx": 13, "cuda": [9, 15, 21, 31, 37, 39, 43, 45, 67], "current": [6, 43, 49, 52], "current_datetime_hostnam": 43, "current_devic": [30, 33, 51, 63], "custom": [13, 14, 43, 53], "cute": [3, 46], "cutoff_len": [14, 19], "cv": 37, "cv2": 37, "cxx": [12, 66], "cxx11_abi": 42, "d50d76daa670286dd6cacf3bcd80b5e4823fc8e1": 45, "data": [3, 6, 10, 13, 14, 17, 22, 31, 43, 52, 64], "data_aishel": 64, "data_arg": 43, "data_col": 43, "data_dir": 43, "data_fil": 43, "data_ptr": 43, "dataclass": 43, "datacollatorwithpad": 43, "datacomp": 31, "dataformat": 43, "datafram": 43, "dataload": [10, 43], "dataloader_num_work": 43, "dataset": [6, 10, 14, 17, 19, 33, 34, 43, 52, 53, 55, 61], "dataset_arg": 43, "dataset_column": 43, "dataset_config_nam": 43, "dataset_dir": 14, "dataset_nam": [3, 43], "dataset_name_map": 43, "dataset_tag": 43, "datatrainingargu": 43, "datatyp": [10, 43], "datefmt": 43, "davinci0": [12, 17], "davinci7": 5, "davinci_manag": [5, 12, 17], "day": [53, 55, 58], "dcmake_build_typ": 12, "dcmi": [5, 12, 17], "ddp": 19, "ddp_model": 39, "ddp_timeout": 19, "ddpmschedul": 43, "deadlock": 6, "debug": [24, 27, 34, 43], "decay": 43, "decenc": 53, "decent": 53, "decod": [58, 67], "decor": [7, 30, 33, 51], "deep": 14, "deepspe": [0, 15, 18, 19, 41, 63], "deepspeed_not_impl": 9, "deepspeed_plugin": 43, "deepspeed_zero_init_disabled_context_manag": 43, "deepspeedengin": [10, 15, 43], "deer": [10, 43], "def": [10, 28, 31, 39, 43, 53], "default": [10, 13, 27, 34, 38, 39, 43, 49, 53], "default_data_col": 43, "defin": [10, 13, 39, 43], "deform": [38, 46], "deformable_group": 38, "deformableconv2d": 38, "del": 43, "delet": 24, "delight": 58, "delta": 38, "demo": 34, "demo_gsm8k": 34, "demo_gsm8k_base_gen": 34, "demo_gsm8k_chat_gen": 34, "demo_math": 34, "demo_math_base_gen": 34, "demo_math_chat_gen": 34, "dep": 45, "depend": [9, 30, 33, 43, 51], "deploy": 21, "deprec": [43, 61], "deriv": 43, "desc": 43, "describ": [22, 43], "descript": [10, 37, 43], "design": [3, 13, 14, 58], "detach": 43, "detail": [3, 10, 43, 52, 61], "detect": [14, 18, 43, 61], "dev": [5, 7, 9, 12, 17, 18, 45], "dev0": [17, 43, 51, 63], "devel": 7, "develop": [14, 24], "devic": [5, 10, 12, 13, 14, 15, 17, 22, 24, 25, 30, 31, 33, 38, 39, 43, 51, 52, 53, 55, 56, 58, 63], "device_count": 39, "device_id": [13, 28, 39], "device_map": [34, 53, 55, 58], "device_nam": [10, 43, 49], "device_typ": 22, "deviceid": 15, "devicetyp": 15, "devmm_svm": [5, 12, 17], "devop": 12, "dggml_cann": 12, "dh": 38, "diagram": 31, "dict": 43, "dictionari": [10, 43], "did": 43, "didn": 53, "differ": [13, 33], "diffus": [0, 3, 41, 46], "diffusionpipelin": [2, 3, 43], "dilat": 38, "dim": [31, 43], "dimens": 43, "dimension_count": 13, "diou": 38, "dir": [22, 24, 43, 57, 61], "directori": [37, 43], "disabl": [6, 43], "discuss": 43, "display": 28, "dist": [15, 31, 39], "dist_in22k_ft_in1k": 52, "distinct": 43, "distribut": [10, 15, 37, 39, 43], "distributed_train": 52, "distributeddataparallel": 39, "dit": 17, "divid": 43, "do": [13, 43, 53, 58, 67], "do_ev": 43, "do_predict": 14, "do_sampl": [15, 34, 55, 58], "do_train": [19, 43], "doc": [14, 43], "docker": [5, 7, 12, 15, 17, 18, 22, 42], "docker_buildkit": 22, "dockerfil": [12, 17, 22], "dockerfile_aarch64_ascend": 22, "docqueri": 55, "document": [14, 43], "doe": 43, "doesn": 43, "dog": [10, 31, 43, 49], "doing": 43, "domain": 13, "domest": 58, "don": 43, "done": [12, 13], "down": [6, 56], "download": [6, 10, 43, 45, 57, 67], "download6": 57, "dp": 39, "dpm": 46, "dpo": [0, 61], "dpo_train": 61, "dpython3_execut": 36, "dpython_librari": 36, "dragon": 3, "dream": [43, 55], "dream_detail_preserv": 43, "dream_train": 43, "driver": [5, 7, 12, 15, 17], "drop": 43, "dropout": 38, "ds": 9, "ds_build_op": 9, "ds_config": [10, 43], "ds_report": 9, "ds_z0_config": 19, "dst": 38, "dtw": 67, "dtype": [10, 37, 38, 43], "due": 43, "dump": 13, "dure": 43, "dw": 38, "dwith_cann": 36, "dwith_cuda": 36, "dx": 38, "dy": 38, "each": [10, 33, 43, 49, 55], "eager": 22, "eager_mod": 22, "earthstar": 52, "easi": 43, "easier": 58, "easili": 43, "eaten": 53, "echo": [7, 18, 57], "econometr": 25, "edg": 61, "edibl": 52, "edu": [2, 5, 6, 7, 14, 17, 18, 19, 30, 33, 48, 51, 56, 60], "effici": [14, 43, 58], "egg": 15, "either": [6, 43], "ej0001": 15, "electrical_engin": 25, "eleg": 3, "elementary_mathemat": 25, "eleutherai": 24, "elif": [10, 31, 43], "els": [10, 18, 43, 53, 56, 58], "ema": 43, "ema_unet": 43, "emamodel": 43, "emaon": 46, "embed": [0, 43, 49], "embed_dim": 31, "embedding_length": 13, "embedding_s": 43, "empti": 43, "empty_cach": 43, "empty_with_format": [38, 41], "en": [14, 43, 55, 67], "enabl": [10, 15, 43, 49, 52, 61], "enable_cann_graph": 28, "enable_default_handl": 43, "enable_explicit_format": 43, "enable_gradient_checkpoint": 43, "enable_xformers_memory_efficient_attent": 43, "encod": [6, 39, 43, 67], "encode_imag": 31, "encode_text": 31, "encoded_input": 53, "encoder_hidden_st": 43, "end": [43, 55], "end_train": 43, "endswith": 43, "energi": [14, 43], "engin": [3, 10, 43], "enjoy": 43, "ensur": 58, "entropi": [10, 43], "enumer": [10, 43], "env": [9, 15, 17, 36, 43], "env_local_rank": 43, "environ": [6, 9, 10, 14, 33, 39, 43], "eos": 13, "eos_token_id": [13, 58], "eot": 13, "eot_id": [13, 58], "ep": [10, 43], "ep_siz": [10, 43], "ep_world_s": [10, 43], "epoch": [6, 10, 31, 43, 52, 53, 61], "epoch_": 43, "eps": [10, 43], "epsilon": [38, 43], "err00001": 15, "err02200": 15, "error": [6, 15, 24, 43], "error_message_manag": 15, "estim": 61, "etc": [5, 7, 17, 43], "euler": 46, "eulerosv2r10": 17, "eval": [10, 13, 14, 19, 24, 31, 34, 43, 61], "eval_base_demo": 34, "eval_chat_demo": 34, "eval_dataset": [43, 53], "eval_logit": 61, "eval_logp": 61, "eval_loss": [43, 61], "eval_pr": [43, 53], "eval_reward": 61, "eval_runtim": 61, "eval_sampl": 43, "eval_samples_per_second": 61, "eval_step": 19, "eval_steps_per_second": 61, "eval_strategi": 53, "evalu": [6, 23, 25, 33, 43, 53], "evaluation_strategi": 19, "evalut": 0, "even": 53, "everi": [10, 43], "everyth": [43, 53], "exampl": [3, 6, 10, 19, 39, 43, 49, 53, 61, 64], "except": 43, "exclud": 43, "exec": [17, 22], "execut": [12, 15, 43, 66], "exist": 43, "exist_ok": 43, "exit": [19, 24], "exp": 43, "exp_avg": 38, "expand_dim": 28, "expandable_seg": 49, "expect": [43, 53], "expens": 43, "experi": 53, "expert": [10, 43], "explan": 43, "explicit": 6, "export": [3, 6, 14, 18, 19, 25, 42, 43, 49, 57, 64], "export_devic": 14, "export_dir": 14, "export_jit": 64, "export_legacy_format": 14, "export_s": 14, "expos": 43, "express": [43, 58], "extens": [9, 43, 48, 60], "extra": [43, 67], "extrem": [3, 43], "eyjtdgf0zw1lbnqiolt7invuaxf1zv9oyxnoijoibgjuyxc0bzdry2pqnnoxexz1n3hmcmnviiwiumvzb3vyy2uioijodhrwczp": 57, "f1": 6, "f16": 13, "f16c": [13, 67], "f32": 13, "f_clamp_kqv": 13, "f_logit_scal": 13, "f_max_alibi_bia": 13, "f_norm_ep": 13, "f_norm_rms_ep": 13, "face": [13, 52, 61], "factor": [38, 43], "factori": [0, 12, 14, 15, 18, 19], "fail": [15, 53], "fake_tensor": 38, "faketensor": 38, "faketensormod": 38, "fallback": 67, "fals": [6, 10, 14, 15, 17, 22, 38, 43, 49, 55, 64], "famili": 52, "faq": [16, 34, 41], "fast": [6, 43], "fast_gelu": [38, 41], "faster": [6, 43], "fasterrcnn": 38, "fc1": [10, 43], "fc2": [10, 43], "fc3": [10, 43], "fc4": [10, 43], "featmap_s": 38, "featur": [3, 10, 43, 49], "feature_extractor": 55, "feed": [10, 43], "feed_forward_length": 13, "feel": 58, "fellow": 67, "felt": 53, "fewshot": 14, "fewshot_as_multiturn": 24, "ffmpeg": 67, "fi": 18, "field": 43, "file": [15, 22, 24, 31, 37, 43, 57], "file_typ": 13, "filenam": 57, "filesystem": 43, "fill": 37, "filter": [10, 25, 43], "filter_height": 38, "filter_width": 38, "final": 43, "finalizeacl": 37, "fine": 43, "finetun": [3, 19, 43], "finetuned_from": 43, "finetuning_typ": [14, 19], "finish": [10, 43], "firmwar": 7, "first": [10, 43], "first_epoch": 43, "five": 53, "fix": [3, 14, 43], "flac": 55, "flag": 43, "flash": 67, "flash_attn": 13, "flatten": 43, "fleshi": 52, "flip": [37, 43], "float": [38, 43, 61], "float16": [2, 3, 38, 43], "float32": [10, 28, 38, 43, 52], "flop_count": 31, "flopcountermod": 31, "fma": [13, 67], "folder": 43, "folder_path": 43, "follow": [6, 10, 43], "food": 53, "for": [6, 10, 13, 14, 15, 28, 33, 37, 43, 48, 49, 52, 53, 56, 60, 61, 67], "forc": [43, 53], "force_torchrun": 15, "foreach": 43, "foreach_ema": 43, "fork": 6, "form": 52, "formal_log": 25, "format": [13, 14, 39, 43], "formul": 43, "forward": [10, 39, 43], "found": [37, 43], "fox": 49, "fp16": [3, 10, 13, 19, 43], "fp16_enabl": [10, 43], "fp16_master_weights_and_grad": [10, 43], "fp16_va": [13, 67], "fp32": [10, 43], "fractal_nz": 38, "framework": [33, 49], "franc": 55, "free": 58, "freez": 43, "freq_bas": 13, "freq_base_train": 13, "freq_scal": 13, "freq_scale_train": 13, "frequenc": 31, "frequency_penalti": 13, "friend": [53, 58], "frog": [10, 43], "from": [2, 3, 6, 10, 13, 14, 22, 28, 31, 36, 38, 39, 43, 52, 53, 55, 56, 57, 58, 67], "from_config": 43, "from_pretrain": [2, 3, 43, 53, 56, 58], "from_tf": 43, "from_train": 43, "front": 53, "frozen": 43, "ft22k": 55, "ftype": [13, 67], "full": [33, 43], "full_eval_dataset": 53, "full_train_dataset": 53, "fullyshardeddataparallel": 39, "func": 15, "funcerrorreason": 15, "function": [10, 38, 39, 43, 49], "function_cal": 14, "fungi": 52, "fungus": 52, "fused_adam": 9, "fuser": 15, "gamma": 43, "gate": [10, 43], "gather": 43, "gaussnois": 37, "gb": 9, "gb0": 67, "gcc": [7, 36, 42], "geastracea": 52, "gelu": [28, 38], "gemmaforcausallm": 15, "gen": 34, "gen_kwarg": [24, 25], "general": [9, 13], "generat": [3, 6, 13, 15, 34, 43, 45, 46, 49, 55, 58], "generated_predict": 14, "generated_text": 55, "genus": 52, "get": [6, 7, 10, 14, 15, 37, 43, 53, 55], "get_acceler": [10, 43], "get_ds_config": [10, 43], "get_input": 28, "get_input_embed": 43, "get_last_checkpoint": 43, "get_last_lr": 43, "get_logg": 43, "get_output": 28, "get_process_log_level": 43, "get_rank": [10, 43], "get_schedul": 43, "get_token": 31, "get_veloc": 43, "getattr": 43, "getdeverrmsg": 15, "getlogg": 43, "gflop": 31, "gfortran": 7, "ggerganov": [12, 66, 67], "ggml": [13, 67], "ggml_cann": 66, "gguf": [13, 67], "gib": 13, "gigant": 53, "git": [6, 12, 17, 21, 22, 24, 36, 43, 45, 49, 60, 61, 66], "git_lfs_skip_smudg": 57, "gitcd": 45, "github": [6, 9, 12, 15, 17, 21, 22, 24, 36, 43, 45, 49, 57, 60, 61, 66], "githubusercont": 22, "give": 53, "given": [10, 43], "glibc2": 17, "global_fact": 25, "global_grad_norm": 38, "global_step": [43, 53], "gnu": 13, "go": 53, "going": 43, "good": 58, "got": [6, 39], "govern": 43, "gpt": 43, "gpt2": [13, 43, 55], "gpu": [13, 22, 34, 43, 67], "gpu_devic": 67, "gpus": [0, 34, 43], "grad": 38, "grad_norm": 61, "gradient": [10, 43], "gradient_accumulation_step": [19, 43], "gradient_checkpoint": 43, "gradient_clip": [10, 43], "gradio_server_nam": 14, "gradio_server_port": 14, "gradio_shar": 14, "gradscal": 31, "grammar": 12, "graph": [13, 15], "greater": 43, "grep": [7, 18], "ground": 43, "group": [10, 25, 38, 39, 43], "group_text": 43, "groupadd": 7, "grow": 52, "gsm8k": 34, "gt": 38, "gtbox": 38, "guarante": 43, "guidanc": 58, "guidance_scal": 3, "gyromitra": 52, "h1543": 17, "had": 53, "half": [10, 43, 45], "hand": 53, "handl": [10, 43], "handler": 43, "hang": 61, "happi": 58, "har": [0, 25], "harn": 24, "has": [6, 15, 37, 43, 53], "hasattr": 43, "hasher": 43, "hat": 46, "have": [10, 33, 43, 53, 55, 57, 58], "hbm": 7, "hccl": [15, 31, 39], "hccp": 15, "head_count": 13, "head_count_kv": 13, "health": 7, "height": 46, "helen": 58, "hello": 14, "help": [10, 14, 24, 37, 43, 58], "helper": 14, "here": [43, 58], "heun": 46, "hey": 55, "hf": [6, 24, 25, 34, 54], "hf_cach": 17, "hf_endpoint": [6, 25, 57], "hf_hub_download": 57, "hf_hub_log_arg": 24, "hf_internlm2_1_8b": 34, "hf_internlm2_chat_1_8b": 34, "hf_model": 22, "hf_qwen2_1_5b": 34, "hf_qwen2_1_5b_instruct": 34, "hfargumentpars": 43, "hidden_s": [10, 43], "high": 53, "high_perform": 28, "high_school_biolog": 25, "high_school_chemistri": 25, "high_school_computer_sci": 25, "high_school_european_histori": 25, "high_school_geographi": 25, "high_school_government_and_polit": 25, "high_school_macroeconom": 25, "high_school_mathemat": 25, "high_school_microeconom": 25, "high_school_phys": 25, "high_school_psycholog": 25, "high_school_statist": 25, "high_school_us_histori": 25, "high_school_world_histori": 25, "higher": [43, 61], "highest": 43, "his": 53, "hisi_hdc": [5, 12, 17], "hiyouga": [15, 17], "hmmm": 43, "home": [7, 13, 58, 61], "hook": 43, "horizont": 43, "hors": [10, 43], "host": [5, 6, 13], "household": 58, "housekeep": 58, "how": [22, 43], "hpp": 37, "hsdp": 39, "html": [37, 43], "http": [14, 37, 43, 55], "https": [2, 5, 6, 7, 12, 14, 15, 17, 18, 19, 21, 22, 24, 25, 30, 33, 36, 43, 45, 48, 49, 51, 55, 56, 57, 60, 61, 66, 67], "httpsconnectionpool": 6, "huawei": 14, "hub": [43, 52, 57], "hub_model_id": 43, "hub_token": 43, "hub_util": 43, "hug": [13, 52], "hugepag": 7, "huggingfac": [6, 15, 17, 34, 43, 54, 55, 60, 61, 67], "huggingface_hub": [43, 57], "huggingfaceh4": 55, "human": [14, 25], "human_ag": 25, "human_sexu": 25, "hwcn": 38, "hwhiaiuser": 7, "hwhiaiusersudo": 7, "hyperparamet": 43, "hysteresi": [10, 43], "i32": 13, "icl_gen_inferenc": 34, "icl_inferenc": 34, "id": [7, 34, 43, 57, 58], "ident": [14, 19], "identifi": 43, "if": [6, 9, 10, 14, 18, 22, 27, 28, 31, 37, 39, 43, 53, 56, 57, 58, 61], "ignor": [43, 53], "ignore_pattern": 43, "ih": 38, "ill": [52, 53], "ilsvrc2012_val_00005844": 52, "im_end": 34, "im_start": 34, "imag": [3, 10, 13, 22, 28, 31, 37, 43, 51, 55], "image_column": 43, "image_dataset": 43, "image_dummi": 55, "image_featur": 31, "image_gflop": 31, "image_grid": 43, "image_load": 43, "image_mparam": 31, "image_path": 28, "image_s": 31, "image_width": 31, "imagefold": 43, "imagenet": [31, 52], "imagenet_class": 28, "imagepath": 37, "imagetoimagepipelin": 55, "img": [28, 37, 43, 55], "img2img": 46, "img_siz": 52, "img_str": 43, "imgcodec": 37, "impira": 55, "implement": [43, 61], "impli": 43, "import": [2, 3, 10, 14, 21, 22, 28, 30, 31, 33, 37, 38, 39, 42, 43, 51, 52, 53, 55, 56, 57, 58, 63], "import_util": 43, "importerror": 43, "imread": 37, "imssokn1ijbaraz0ilawdfsutbvdjmz9j": 57, "imwrit": 37, "in": [9, 10, 12, 13, 14, 15, 28, 37, 43, 52, 53, 55, 58, 61], "in_channel": 38, "in_height": 38, "in_width": 38, "inc": 43, "includ": [10, 13, 14, 36, 37, 43, 53, 57], "include_path": 24, "inconsist": 43, "index": [38, 43, 45], "indic": [10, 43], "inf": 43, "infer": [3, 6, 19, 28, 34, 43, 52, 56, 61], "inferencemod": 61, "inferencesess": 28, "info": [5, 7, 9, 12, 14, 17, 18, 24, 34, 43], "inform": [10, 43, 58], "init": [43, 45], "init_distribut": [10, 43], "init_process_group": 39, "init_track": 43, "initacl": 37, "initi": [6, 10, 15, 39, 43, 56], "initial_global_step": 43, "initial_scale_pow": [10, 43], "input": [10, 31, 37, 38, 43, 49, 61], "input_dir": 43, "input_id": [43, 53, 58], "input_nam": 28, "input_perturb": 43, "input_s": 52, "instal": [2, 5, 6, 7, 9, 10, 13, 14, 15, 17, 18, 19, 21, 24, 27, 28, 30, 33, 36, 43, 45, 48, 51, 53, 56, 57, 60, 63], "install_deepspe": 17, "instanc": 43, "instantan": 43, "instanti": 43, "instead": [43, 53, 55, 61], "instruct": [13, 25, 34, 53, 55, 57, 58], "int": [10, 37, 38, 43], "int16": 38, "int32": 38, "int64": 38, "int8": 38, "integr": [12, 43], "intellig": 14, "intend": 14, "interest": [10, 38, 43], "intern": [15, 61], "international_law": 25, "internlm": [21, 22, 34], "internlm2": [22, 34], "internlm2_5": 22, "internvl2": 22, "interpol": [43, 52], "interpolationmod": 43, "interv": [10, 43], "into": [43, 52], "intric": [3, 52], "introduc": [22, 58], "inttensor": 38, "invalid": 15, "invoic": 55, "iof": 38, "iostream": 37, "iou": 38, "iou_threshold": 38, "iphon": 55, "iron": 58, "is": [6, 10, 14, 15, 22, 37, 43, 53, 55, 56, 58, 61], "is_avail": [31, 43, 53, 56, 58], "is_compiled_modul": 43, "is_cross": 38, "is_initi": 43, "is_local_main_process": 43, "is_main_process": 43, "is_torch_xla_avail": 43, "is_train": 43, "is_wandb_avail": 43, "is_xformers_avail": 43, "isdir": 43, "isinst": 43, "issu": 15, "it": [6, 10, 12, 13, 14, 17, 22, 27, 37, 38, 43, 53, 55, 56, 61], "itd": 5, "item": [10, 38, 43], "iter": [10, 43], "itertool": 43, "its": [43, 55], "iw": 38, "j5": 36, "jax": 0, "jfk": 67, "jiahao": 13, "jieba": 14, "jit": [9, 15], "jitter": [10, 43], "join": [39, 43], "jpeg": [22, 52], "jpg": [28, 31, 55], "json": [6, 12, 14, 19, 24, 43, 57, 64], "json_fil": 43, "jsonl": [14, 43], "jump": 49, "jurisprud": 25, "just": [6, 9, 43, 57], "k15qrjlykifslz": 57, "k2hdnftkt9kladg8hhfqmgwuhdtjsvcezjirkwptzrf0dohepoiohocqw": 57, "keep": 43, "keep_linebreak": 43, "keepdim": 31, "kernel": [2, 7, 12, 17, 22, 24, 30, 33, 36, 48, 51, 60, 61, 63, 66], "kernel_s": 38, "key": [13, 43, 57, 64], "kh": 38, "kid": 53, "kill": 15, "kind": 43, "knextpoweroftwo": 28, "know": 43, "kt": 38, "ktyc": 57, "kv": [13, 67], "kw": 38, "kwarg": [34, 43], "l6": 49, "label": [10, 14, 28, 31, 39, 43, 52, 53, 55, 56, 61], "label_pad_token_id": 61, "lambda": [10, 43], "lambdalab": 43, "lampoon": 55, "lang": [14, 67], "languag": [13, 14, 19, 43], "lapack": 7, "larg": [13, 19, 43, 57, 67], "larger": 43, "last": [15, 43], "last_checkpoint": 43, "latent": 43, "latent_dist": 43, "latest": [13, 17, 22, 24, 36, 43, 45, 63], "launch": 43, "launcher": [10, 15, 43], "laundri": 58, "law": 43, "layer": [10, 13, 43], "layer_norm_rms_epsilon": 13, "layout": [13, 38], "lazi": 49, "learn": [0, 6, 14, 43, 53, 60, 61], "learning_r": [3, 19, 43, 61], "learnt": 43, "least": [39, 43], "leav": 43, "left": [34, 43], "len": [34, 43], "len_train_dataloader_after_shard": 43, "length": [13, 43, 53], "let": [10, 43, 55], "level": [37, 43], "levelnam": 43, "lf": 13, "lib": [9, 15, 36], "lib64": 12, "libascend_h": 22, "libbla": 7, "libblas3": 7, "libffi": 7, "librari": 43, "libsqlite3": 7, "libssl": 7, "libtorch": 64, "licenc": 57, "licens": [37, 43, 57], "life": 58, "light": 3, "like": [43, 52], "limit": [24, 25, 43], "line": [15, 28, 43], "linear": [10, 13, 39, 43], "link": [12, 66], "linux": [7, 13, 17, 45], "list": [10, 43, 49, 64], "listdir": 43, "listen": 45, "listint": 38, "lite": 22, "live": 55, "ll": 43, "llama": [0, 13, 14, 15, 18, 19, 53, 55, 57, 58], "llama2": 22, "llama3": [13, 22, 57], "llama_kv_cache_init": 13, "llama_model_load": 13, "llama_new_context_with_model": 13, "llama_print_tim": 13, "llamacpp": 12, "llamafactori": [14, 15, 17, 19], "llamafil": 13, "llamameta": 57, "llm": [0, 13, 19, 20, 43], "llm_load_print_meta": 13, "llm_load_tensor": 13, "llm_load_vocab": 13, "llms": 21, "lm": [0, 21, 25], "lm_dataset": 43, "lm_deploy": 20, "lm_eval": 25, "lmdeploy": [0, 21, 22, 33], "lmdeploy_ascend_demo": 22, "lms": 46, "load": [3, 10, 13, 43, 49, 52, 53, 67], "load_dataset": [43, 53], "load_from_cache_fil": 43, "load_imag": 22, "load_lora_weight": 3, "load_model": 43, "load_model_hook": 43, "load_openai_model": 31, "load_or_create_model_card": 43, "load_stat": 43, "load_state_dict": 43, "loader": [10, 43], "loading_dataset": 43, "local": [5, 7, 10, 12, 15, 17, 22, 27, 39, 43, 57, 64], "local_devic": [10, 43], "local_rank": [10, 43], "localhost": [14, 39], "locat": 53, "lock": 38, "log": [10, 13, 31, 34, 43, 53], "log_interv": [10, 43], "log_level": 43, "log_metr": 43, "log_sampl": 24, "log_valid": 43, "log_with": 43, "logger": 43, "logging_dir": 43, "logging_step": 19, "logical_fallaci": 25, "login": 43, "logit": [43, 53, 61], "loglevel": 67, "logp": 61, "long": 43, "longer": 43, "look": [43, 61], "loop": [10, 43], "loop_messag": 13, "lora": [16, 34, 43], "lora_model_path": 3, "lora_target": 19, "loss": [10, 19, 39, 43, 52, 53, 61], "loss_fn": 39, "loss_scal": [10, 43], "loss_scale_window": [10, 43], "low": 53, "low_cpu_mem_usag": 43, "lowr": 46, "lr": [10, 31, 38, 39, 43, 52], "lr_schedul": [3, 43], "lr_scheduler_typ": 19, "lr_warmup_step": [3, 43], "lspci": 7, "lzkpxqmyy0xqw047tnn9iwx": 57, "m_in": 38, "m_out": 38, "macaw": 55, "machin": 43, "machine_learn": 25, "madebyollin": 3, "magic": 3, "magnitud": 43, "main": [6, 10, 13, 22, 24, 31, 37, 39, 43, 55, 60, 66, 67], "main_process_first": 43, "main_process_on": 43, "maintain": 43, "mainten": 58, "make": [7, 36, 43, 53, 57, 58, 66], "make_image_grid": 43, "makedir": 43, "manag": [13, 25, 33, 53], "manual_se": 43, "map": [6, 43, 53], "margin": 61, "mark": 3, "market": 25, "mask": 38, "master_addr": [15, 18, 19, 39], "master_port": [15, 18, 19, 39], "mat": 37, "mat1": 38, "mat2": 38, "match": 43, "materi": 43, "math": [34, 43], "matmul": [38, 43], "matmul_int8": 13, "max": [10, 13, 34, 43], "max_batch_s": 24, "max_eval_sampl": 43, "max_grad_norm": [38, 43], "max_len": 43, "max_length": [43, 53], "max_new_token": [55, 58], "max_pos_embed": 43, "max_position_embed": 43, "max_sampl": [14, 19], "max_shap": 38, "max_size_per_class": 38, "max_total_s": 38, "max_train_sampl": 43, "max_train_step": 43, "maximum": [43, 53], "maxpool2d": [10, 43], "may": [43, 58], "mayb": 15, "mb": [7, 13, 67], "mbert_adam": 38, "mcdonald": 53, "md": 43, "me": [53, 58], "meal": 53, "mean": [9, 28, 38, 43, 52, 55], "means0": 38, "means1": 38, "means2": 38, "means3": 38, "means33": 38, "meddl": 53, "media": 13, "medical_genet": 25, "medium": 67, "meet": 9, "megr": 14, "mel": 67, "memori": [7, 9, 43], "memory_format": 43, "merg": 13, "messag": [13, 14, 24, 43, 58], "meta": [0, 13, 53, 54, 55, 58], "metadata": [13, 43], "metal": 67, "method": [6, 19, 43, 46], "metric": [15, 17, 25, 34, 43, 53], "mg": 13, "mib": 13, "microsoft": 55, "might": [10, 43], "min": [10, 34, 43], "min_capac": [10, 43], "min_loss_scal": [10, 43], "min_p": 13, "mine": 53, "mini": [10, 43], "minibatch": 38, "miniconda": 44, "miniconda3": [9, 36, 45], "minilm": 49, "minim": 43, "minimum": [10, 43, 61, 63], "minut": 53, "mirostat": 13, "mirostat_": 13, "mirostat_lr": 13, "mirror": [6, 25, 54], "miscellan": 25, "miss": 43, "mistak": 53, "mix": [10, 43], "mixed_precis": [3, 43], "mixtur": [10, 43], "mkdir": [36, 45, 66], "ml": 0, "mlfoundat": 45, "mlk": 55, "mlp": [10, 43], "mlp_type": [10, 43], "mm": 42, "mmdeploy": 22, "mmlab": 22, "mmlu": [14, 25], "mode": [13, 22, 34, 38, 43], "model": [3, 6, 10, 12, 13, 14, 19, 24, 25, 28, 31, 33, 34, 39, 43, 45, 51, 52, 53, 55, 56, 58, 67], "model_arg": [24, 25, 43], "model_best": 52, "model_card": 43, "model_ckpt": 52, "model_cl": 43, "model_config": 43, "model_config_class": 43, "model_descript": 43, "model_engin": [10, 43], "model_for_causal_lm_map": 43, "model_id": [53, 56, 58], "model_kwarg": 58, "model_max_length": 43, "model_nam": [3, 31, 43], "model_name_or_path": [14, 19, 43], "model_paramet": [10, 43], "model_path": [3, 12, 28], "model_pr": 43, "model_revis": 43, "model_typ": 43, "modelargu": 43, "modeling_chatglm": 15, "models_ckpt": 3, "modelscop": [15, 17, 18, 19], "modelsvit": 31, "modifi": 43, "modul": [10, 36, 38, 39, 43], "modulelist": [10, 43], "moe": [10, 43], "moe_layer_list": [10, 43], "moe_param_group": [10, 43], "momentum": 43, "mood": 53, "moral_disput": 25, "moral_scenario": 25, "more": [33, 43, 53, 61], "most": [15, 43], "move": 43, "mp": 39, "mparam": 31, "mps": 43, "ms": [13, 36, 67], "ms_cach": 17, "ms_coco_2017_url_text": 31, "mse_loss": 43, "mse_loss_weight": 43, "mseloss": 39, "msg": 15, "much": 43, "mulkernelnpuopapi": 15, "multi": [31, 34], "multipl": [10, 43], "multiprocess": [39, 43], "mushroom": 52, "must": [15, 43], "mv": 67, "my": [13, 53, 55, 58, 67], "my_imag": 43, "myself": [53, 58], "n1": 14, "n2": 14, "n3": 14, "n_audio_ctx": 67, "n_audio_head": 67, "n_audio_lay": 67, "n_audio_st": 67, "n_batch": 13, "n_ctx": 13, "n_ctx_orig_yarn": 13, "n_ctx_train": 13, "n_e": [10, 43], "n_embd": [13, 43], "n_embd_head_k": 13, "n_embd_head_v": 13, "n_embd_k_gqa": 13, "n_embd_v_gqa": 13, "n_expert": 13, "n_expert_us": 13, "n_ff": 13, "n_gpu": 43, "n_gqa": 13, "n_head": 13, "n_head_kv": 13, "n_keep": 13, "n_lang": 67, "n_layer": 13, "n_mel": 67, "n_merg": 13, "n_npus": 39, "n_param": 43, "n_predict": 13, "n_rot": 13, "n_shot": 14, "n_swa": 13, "n_text_ctx": 67, "n_text_head": 67, "n_text_lay": 67, "n_text_stat": 67, "n_thread": [13, 67], "n_ubatch": 13, "n_vocab": [13, 67], "name": [5, 7, 9, 10, 12, 13, 14, 17, 22, 24, 28, 43, 58], "narg": [10, 43], "narsil": 55, "naruto": 43, "nassist": 14, "nation": 55, "native_amp": 43, "natur": 14, "nd": 38, "ndarray": 43, "near": 53, "necessari": 43, "need": [9, 10, 27, 43, 53, 55], "negat": 46, "neither": 53, "neon": [13, 67], "nesterov": 38, "net": [7, 10, 43, 57], "net1": 39, "net2": 39, "network": [5, 10, 43], "neural": [10, 14, 43], "never": 53, "new": 43, "new_nois": 43, "newli": [6, 56], "next": 55, "next_token": 15, "nf": 18, "ngl": [12, 13], "ngrok": 45, "nhwc": [38, 43], "ni": 53, "nice": 43, "ninja": 9, "nlp": 6, "nlp_exampl": 6, "nlp_pipelin": 56, "nltk": 14, "nms": 38, "nmsed_box": 38, "nmsed_class": 38, "nmsed_num": 38, "nmsed_scor": 38, "nn": [10, 39, 43], "nnal": 22, "nnal_": 22, "nnode": [15, 18, 19], "no": [3, 9, 10, 15, 43, 45, 53], "no_grad": [10, 31, 43], "node": 13, "node_rank": [15, 18, 19], "nois": 43, "noise_offset": 43, "noise_schedul": 43, "noisi": [10, 43], "noisy_gate_polici": [10, 43], "noisy_lat": 43, "non": 43, "non_block": [38, 43], "non_ema": 43, "non_ema_revis": 43, "none": [10, 13, 14, 25, 38, 43], "nonmaxsuppress": 38, "norm": [31, 43], "normal": [10, 37, 38, 43], "not": [6, 9, 13, 15, 43, 52, 53, 55, 56, 61, 67], "note": [6, 9, 10, 13, 43, 61], "notimplementederror": 15, "now": 43, "np": [28, 37, 43, 53], "np_imag": 43, "nproc": 39, "nproc_per_nod": [15, 18, 19, 31], "npu": [2, 3, 5, 6, 7, 10, 12, 13, 14, 15, 16, 17, 19, 22, 25, 27, 28, 29, 32, 34, 37, 38, 39, 42, 43, 49, 50, 53, 56, 58, 61, 63, 64, 67], "npu_alloc_float_status": [38, 41], "npu_anchor_response_flag": [38, 41], "npu_apply_adam": [38, 41], "npu_batch_nm": [38, 41], "npu_bert_apply_adam": [38, 41], "npu_bmmv2": [38, 41], "npu_bounding_box_decod": [38, 41], "npu_broadcast": [38, 41], "npu_ciou": [38, 41], "npu_clear_float_status": [38, 41], "npu_confusion_transpos": [38, 41], "npu_conv2d": [38, 41], "npu_conv3d": [38, 41], "npu_conv_transpose2d": [38, 41], "npu_convolut": [38, 41], "npu_convolution_transpos": [38, 41], "npu_deformable_conv2d": [38, 41], "npu_list": 18, "npu_mem_limit": 28, "npus": 39, "nthe": 53, "null": [18, 42], "nullcontext": 43, "num": [10, 34, 43], "num_anchor": 38, "num_base_anchor": 38, "num_class": 38, "num_expert": [10, 43], "num_fewshot": [24, 25], "num_gpus": 18, "num_inference_step": [3, 43], "num_npus": [18, 52], "num_proc": 43, "num_process": 43, "num_to_remov": 43, "num_train_epoch": [3, 19, 43], "num_train_timestep": 43, "num_training_step": 43, "num_training_steps_for_schedul": 43, "num_update_steps_per_epoch": 43, "num_warmup_step": 43, "num_warmup_steps_for_schedul": 43, "num_work": [10, 43], "number": [10, 43, 55, 61], "numel": 43, "numer": 52, "numpi": [7, 28, 30, 33, 37, 43, 51, 53], "nuser": 14, "nutrit": 25, "nvidia": 43, "nyou": 14, "obiect": 15, "object": [14, 22], "observ": 43, "obtain": 43, "occasion": 53, "octan": 3, "of": [6, 10, 14, 24, 37, 38, 43, 49, 52, 53, 55, 56, 61, 67], "off": [36, 53], "offload": 43, "offload_ema": 43, "offset": [38, 43], "often": 52, "ogg": 67, "ok": 7, "okay": 9, "on": [6, 10, 12, 13, 14, 31, 34, 36, 43, 53, 55, 56, 66], "onc": 43, "one": [43, 53, 55], "ones_lik": 43, "onli": [10, 43, 61], "onnx": [0, 28], "onnxruntim": [27, 28], "op": [9, 15, 38], "op_plugin": 15, "op_select_impl_mod": 28, "opapi": 15, "open": [22, 28, 30, 31, 53, 55], "open_cliop": 30, "open_clip": [31, 33, 45, 51, 52, 63], "open_clip_pytorch_model": 31, "openai": [14, 31, 33, 43, 45, 67], "openclip": 30, "opencompass": 0, "opencv": [0, 37], "opencv2": 37, "opencv_contrib": 36, "opencv_extra_modules_path": 36, "opencv_test_cannop": 36, "openeul": 7, "openeuler20": 7, "opengvlab": 22, "openicl": 34, "openrail": 43, "openslr": 64, "openssl": 7, "openvino": 67, "oper": [49, 61], "ops": [9, 15], "optim": [10, 39, 43], "optimizer_cl": 43, "option": [24, 28, 43, 55], "optypelist_for_implmod": 28, "or": [10, 14, 43, 58, 61], "order": [13, 43, 52, 53], "org": [17, 37, 43, 45, 55], "organ": 58, "origin": [43, 57], "ort": 28, "os": [10, 14, 39, 43], "other": [10, 13, 14, 25, 33, 43, 55, 58], "our": [10, 43], "out": [6, 34, 38, 43], "out_channel": 38, "out_height": 38, "out_width": 38, "outer": 52, "output": [10, 13, 17, 19, 34, 37, 38, 39, 43, 55, 58], "output_dir": [3, 14, 19, 43, 53], "output_nam": 28, "output_pad": 38, "output_path": 24, "output_s": 38, "outputpath": 37, "over": [10, 43, 49, 53], "overcom": 43, "overflowerror": 43, "overlap_comm": [10, 43], "overrid": [13, 43], "overwrit": 43, "overwrite_cach": [14, 19, 43], "overwrite_output_dir": [14, 19, 43], "own": 43, "packag": [7, 9, 15, 43], "package_refer": 43, "pad": [6, 34, 38, 43, 53, 67], "padding_sid": 34, "page": [7, 43], "pair": [13, 57], "panda": [31, 43], "paper": 43, "parallel": [6, 10, 27, 39, 43], "parallel_mod": 43, "param": [10, 13, 38, 43, 52], "param_count": 52, "paramet": [10, 15, 39, 43], "pari": 55, "parodist": 55, "parrot": 55, "pars": 43, "parse_arg": [10, 37, 43], "parse_args_into_dataclass": 43, "parse_json_fil": 43, "parser": [10, 37, 43], "part": [37, 43], "parti": 53, "partial": 43, "particular": 43, "partit": 43, "pass": [10, 36, 39, 43, 49, 55], "passiv": 43, "past_key_valu": 43, "patch16": 55, "path": [3, 9, 13, 14, 27, 31, 34, 36, 37, 43, 52, 57, 67], "path_to_model": 13, "path_to_train_fil": 43, "path_to_validation_fil": 43, "path_to_your_model": 12, "pathlib": 43, "pathlib2": [7, 30, 33, 51], "pciutil": 7, "pcm_s16le": 67, "pd": 31, "pde": 46, "peft": [17, 34], "penalti": [13, 38], "peopl": 53, "per": [10, 13, 43, 67], "per_device_eval_batch_s": [14, 19, 43], "per_device_train_batch_s": [19, 43], "percentag": 43, "perform": [14, 43], "perhap": 53, "peridium": 52, "period": 43, "perm": 38, "permiss": 43, "perplex": [12, 43], "person": 53, "pertain": 43, "perturb": 43, "phallal": 52, "philosophi": 25, "phone": 55, "photo": 3, "pick": 43, "pickl": 43, "pid": 15, "piec": 13, "pil": [28, 31, 55], "pilimag": [10, 43], "pillow": 28, "pin_memori": [38, 43], "pip": [1, 5, 6, 9, 10, 14, 15, 17, 18, 19, 24, 28, 30, 33, 42, 43, 45, 48, 51, 53, 56, 57, 60, 63], "pip3": [7, 27, 30, 33, 51, 56], "pip_index": 17, "pipe": [3, 22, 55], "pipelin": [2, 3, 22, 43, 54, 56], "pixel": 52, "pixel_valu": 43, "place": 53, "plane": [10, 43], "platform": [17, 43], "pleas": [6, 15, 22, 43, 58, 61], "plot_loss": 19, "plugin": 15, "png": [3, 37, 43, 55], "point": [43, 61], "pointer": [43, 57], "pokemon": [3, 43], "polici": [10, 43, 57], "polit": 58, "poll": 55, "poll_parrot": 55, "polynomi": 43, "pool": [10, 13, 43], "poor": 53, "pop": 43, "popinjay": 55, "populate_model_card": 43, "port": [6, 14], "posit": 56, "possibl": [6, 43], "post1": 45, "post6": [48, 60], "potenti": 43, "power": 7, "ppo": 0, "pr": [15, 43], "pre": 13, "precis": [10, 43, 53], "pred": 43, "pred_idx": 28, "predefin": 53, "predict": [6, 10, 14, 28, 34, 43, 52, 53, 56], "predict_bleu": 14, "predict_on": 24, "predict_result": 14, "predict_roug": 14, "predict_runtim": 14, "predict_samples_per_second": 14, "predict_steps_per_second": 14, "predict_with_gener": 14, "prediction_typ": 43, "prefer": 45, "prehistori": 25, "prepar": [10, 13, 43], "preprocess": [28, 31, 43], "preprocess_logits_for_metr": 43, "preprocess_train": 43, "preprocessing_num_work": [14, 19, 43], "prescale_gradi": [10, 43], "presence_penalti": 13, "present": 43, "preserv": 43, "press": 14, "pretrain": [25, 31, 43, 49, 52], "pretrained_model_name_or_path": [3, 43], "pretrained_vae_model_name_or_path": 3, "pretrainedmodel": 55, "pretrainedtoken": 55, "previous": 43, "primari": 58, "print": [10, 14, 18, 21, 22, 28, 30, 31, 33, 42, 43, 51, 53, 55, 56, 58, 63], "printmessag": 37, "privat": 43, "privateuse1": 15, "pro": 7, "prob": [31, 38, 52], "probabl": [6, 56], "problem": [43, 55], "proceed": [10, 43], "process": [6, 7, 14, 15, 34, 37, 39, 43, 52, 61, 67], "process_mode_manag": 15, "processgrouphccl": 15, "processing_class": 43, "processor": 67, "produc": 61, "professional_account": 25, "professional_law": 25, "professional_medicin": 25, "professional_psycholog": 25, "profil": 31, "profile_model": 31, "profiler_result": 31, "progress": 43, "progress_bar": 43, "project": 37, "project_config": 43, "project_dir": 43, "project_nam": 43, "projectconfigur": 43, "prompt": [3, 13, 14, 19, 43, 46, 53, 67], "proper": 43, "propos": 38, "protobuf": [7, 30, 33, 51], "provid": [28, 43, 53, 58], "prune": 46, "psutil": [7, 30, 33, 51], "pt": [31, 43, 58, 67], "pt22k": 55, "pta": 15, "pth": 52, "public": 43, "public_rel": 25, "puffbal": 52, "pull": [15, 43], "puppi": 37, "purpl": 3, "purpos": [13, 43], "push": 43, "push_to_hub": 43, "put": 31, "pwd": 36, "py": [3, 6, 7, 13, 14, 15, 18, 19, 30, 31, 33, 43, 49, 51, 52, 61, 64, 67], "py3": 15, "pyenv": 15, "pypi": [2, 5, 6, 7, 14, 17, 18, 19, 30, 33, 48, 51, 56, 60], "python": [1, 6, 7, 13, 15, 17, 21, 27, 29, 31, 32, 34, 42, 43, 45, 49, 50, 52, 56, 61, 62, 67], "python3": [3, 7, 9, 15, 21, 31, 36], "python310": 45, "python_include_dir": 36, "pytorch": [0, 9, 15, 17, 19, 21, 22, 31, 38, 39, 43, 45, 48, 49, 51, 58, 60], "pytorch_model": 52, "pytorch_npu_alloc_conf": 49, "pytorchengin": 22, "pytorchengineconfig": 22, "pyyaml": [7, 30, 33, 51], "q4_0": 13, "q5_0": 67, "q5_1": 67, "q8_0": 13, "q_proj": 19, "qntvr": 67, "quantiz": [13, 66], "quantization_vers": 13, "queri": 58, "question": [22, 55, 58], "quick": [49, 53], "quicker": 43, "quickstart": 14, "quit": 14, "qwen": [14, 19, 25, 33, 34, 57], "qwen1": [14, 19], "qwen1_5_lora_sft_d": [14, 18, 19], "qwen2": [13, 22, 25, 34, 57], "racial": 53, "rais": 43, "ram": 43, "ran": 36, "rand": 38, "randint": 43, "randn": [38, 39, 42, 43], "randn_lik": 43, "random": [37, 43], "random_flip": [3, 43], "randomcrop": 43, "randomhorizontalflip": 43, "rang": [10, 14, 43, 53], "rank": [10, 15, 39, 43], "rankid": 15, "rare": 53, "rate": 43, "raw": [22, 55], "raw_dataset": 43, "rc1": [2, 5, 12, 17, 24, 30, 33, 36, 48, 51, 60, 63, 66], "rc2": [9, 17, 22, 63], "re": [6, 43], "reach": 3, "read": [6, 43], "readlin": 28, "readm": [6, 43], "realist": 3, "reason": 15, "rebalanc": 43, "recalcul": 43, "receipt": 53, "recent": [15, 43], "recognit": [14, 55, 64, 67], "recommend": [43, 61, 63], "reduce_bucket_s": [10, 43], "reduce_scatt": [10, 43], "reduct": 43, "refer": [43, 53], "refresh": 24, "reg": 15, "regardless": [10, 43], "region": 38, "regist": [13, 53], "register_load_state_pre_hook": 43, "register_save_state_pre_hook": 43, "register_to_config": 43, "registrar": 13, "reinforc": [0, 60, 61], "reject": 61, "relat": [10, 43, 58], "releas": [7, 12, 24, 27, 36, 61], "relu": [10, 39, 43], "relwithdebinfo": 27, "remain": 53, "remaind": 43, "remod": 52, "remot": 43, "remov": [43, 61], "remove_column": 43, "removing_checkpoint": 43, "render": 3, "repeat": 43, "repeat_last_n": 13, "repeat_penalti": 13, "repo": [13, 15, 45, 67], "repo_fold": 43, "repo_id": [43, 57], "repo_id_or_path": 43, "report": [9, 15, 31, 43], "report_to": 43, "repositori": [43, 45], "reprob": 52, "reproduc": 43, "request": [7, 30, 33, 51, 55, 57, 58], "requir": [9, 10, 39, 43, 45, 63], "require_vers": [14, 43], "requirements_npu": 45, "requires_grad": [10, 38, 43], "requires_grad_": 43, "res": 38, "reserv": 43, "resetdevic": 37, "reshap": [28, 38, 43], "resid_pdrop": 43, "residu": [10, 43], "resiz": [28, 43, 55], "resize_token_embed": 43, "resnet": 28, "resnet50": 28, "resolut": [3, 43], "resolv": [6, 55], "resourc": 43, "respond": 58, "respons": [13, 15, 22, 58], "restaur": 53, "restor": 43, "result": [14, 15, 28, 31, 34, 43, 52, 56, 61], "resum": [6, 43], "resume_from_checkpoint": 43, "retcod": 15, "return": [10, 28, 37, 39, 43, 53], "return_dict": 43, "return_tensor": [43, 58], "review": 53, "revis": 43, "reward": 61, "rf": 45, "rgb": 43, "right": 43, "rise": 55, "risk": [43, 61], "rm": [22, 45], "rmtree": 43, "rng": 37, "roi": 38, "role": [14, 55, 58], "root": [7, 9, 10, 17, 43], "rope": 13, "rope_finetun": 13, "rotat": 37, "roug": 14, "round": [43, 55], "row": 37, "rpn": 38, "rsampl": [10, 43], "rtgetdevmsg": 15, "rude": 53, "run": [5, 10, 12, 13, 14, 15, 17, 22, 28, 39, 43, 52, 61, 67], "run_clm": 43, "run_npu": 64, "running_loss": [10, 43], "runtim": [0, 9, 10, 22, 28, 43], "runtimeerror": 15, "runwayml": 2, "rzf8xaiyf5": 57, "s0": 64, "s0002": 64, "s34b": 31, "safe": 52, "safetensor": [6, 46, 52], "safety_check": 43, "salesforc": 45, "same": 43, "sampl": [13, 37, 43, 46, 67], "sampling_r": 55, "saniti": 43, "save": [3, 14, 15, 19, 31, 43], "save_checkpoint": 15, "save_metr": 43, "save_model": 43, "save_model_card": 43, "save_model_hook": 43, "save_path": 43, "save_pretrain": 43, "save_st": 43, "save_step": 19, "scalar": 38, "scale": [13, 43], "scale_attn_weight": 43, "scale_lr": 43, "scaling_factor": 43, "scene": 43, "sched": 52, "schedul": [10, 43, 58], "schema": 12, "scienc": 14, "scikit": [6, 53], "scipi": [7, 30, 33, 51], "score": [38, 55, 56], "score_threshold": 38, "scp": 64, "scratch": 43, "screen": 13, "script": [6, 7, 43, 61], "sd": [3, 43], "sdxl": 3, "sec": 67, "second": 13, "section": 43, "secur": 43, "security_studi": 25, "see": [43, 61], "seed": [3, 13, 24, 31, 38, 43, 46, 53], "seem": 43, "select": [7, 13, 43, 53], "self": [10, 13, 15, 38, 39, 43, 67], "send": 43, "send_example_telemetri": 43, "sent": 43, "sentenc": [0, 43, 49, 56], "sentencetransform": 49, "sentiment": 56, "separ": [10, 43], "seq": [18, 34], "sequenc": [43, 55], "seresnet34": 52, "serial": 43, "serv": [22, 53], "server": [12, 16, 66], "servic": [13, 53], "sess_opt": 28, "session": 28, "sessionopt": 28, "set": [6, 10, 13, 43], "set_default_tensor_typ": 38, "set_devic": [10, 43], "set_env": [7, 22, 27], "set_postfix": 43, "set_progress_bar_config": 43, "set_se": 43, "set_verbos": 43, "set_verbosity_error": 43, "set_verbosity_info": 43, "set_verbosity_warn": 43, "setdevic": 37, "setlevel": 43, "setup": [15, 39, 43], "sft": [0, 16, 19], "sgd": [39, 43], "sh": [7, 22, 27, 45, 52, 57, 64, 67], "shake": 53, "shall": 43, "shanghai": 22, "shape": [37, 38, 43, 52, 58], "shard": 43, "share": [9, 22], "she": 53, "shell": 43, "shift": 43, "ship": [10, 43], "shm": [5, 9, 17], "shot": 25, "should": [6, 43, 53, 56, 58], "should_log": 43, "show": [24, 37, 43], "show_config": 24, "shuffl": [10, 43, 53], "shutil": 43, "signatur": 57, "silent": 61, "simpl": [2, 5, 6, 7, 12, 13, 14, 17, 18, 19, 30, 33, 43, 48, 51, 56, 60], "simpli": [10, 43], "sinc": [33, 43], "singl": [43, 55], "site": [9, 15], "size": [5, 9, 10, 13, 17, 31, 34, 38, 43, 52, 55, 67], "skip": 45, "skip_special_token": 58, "sklearn": 53, "slight": 43, "slower": 43, "sm": 13, "small": [43, 55, 67], "small_eval_dataset": 53, "small_train_dataset": 53, "smaller": 43, "smartphon": 14, "smell": 52, "smi": [5, 7, 12, 17, 18], "snapshot_download": 57, "snr": 43, "snr_gamma": 43, "so": [10, 22, 43, 53, 55, 67], "social": 14, "social_sci": 25, "sociolog": 25, "softmax": 31, "softwar": 43, "solut": 15, "some": [6, 43, 56, 61], "someon": 53, "someth": [43, 53], "sort": 43, "sourc": [7, 22, 27, 45], "space": [43, 55], "sparkl": 58, "spawn": 39, "special": [13, 43, 53], "specif": [14, 43, 58], "specifi": [19, 43], "spectacular": 53, "specul": 12, "speech": [14, 55, 64, 67], "speed": 43, "split": [6, 13, 14, 43, 52], "split_params_into_different_moe_groups_for_optim": [10, 43], "sq": 15, "sqid": 15, "sqlite": 7, "squash": 52, "squeez": [10, 43], "sr": [38, 55], "src": [14, 15, 18, 19, 31, 38, 43], "sse3": [13, 67], "ssm_d_conv": 13, "ssm_d_inner": 13, "ssm_d_state": 13, "ssm_dt_rank": 13, "sss": 61, "ssse3": [13, 67], "stabil": 45, "stabilityai": 3, "stabl": [0, 2, 3, 24, 43, 46], "stablediffus": 45, "stablediffusionpipelin": 43, "stack": 43, "stage": [10, 14, 19, 43, 64], "stand": 55, "standard": [10, 43], "star": 52, "start": [10, 13, 14, 34, 43, 53, 55], "startswith": 43, "startup": 14, "state": 43, "state_dict": 43, "statist": [10, 43], "std": [28, 37, 52], "stderr": 25, "stdout": 43, "stds": 38, "stds0": 38, "stds1": 38, "stds2": 38, "stds3": 38, "steak": 53, "stem": [14, 25], "step": [3, 10, 12, 13, 19, 39, 43, 46], "step1": [10, 43], "step_": 43, "step_loss": 43, "step_siz": 38, "steps_per_print": [10, 43], "still": 53, "stinkhorn": 52, "stop": 34, "stop_stag": 64, "store": [43, 53], "store_tru": [10, 43], "str": [10, 13, 43, 55], "strategi": 24, "stream": [6, 15, 43, 55, 56], "streamhandl": 43, "stride": 38, "string": [37, 43, 49, 55], "strip": 28, "structur": [13, 43], "style": 43, "subfold": 43, "subject": 37, "subprocess": 43, "subtl": 53, "success": 7, "sudo": 7, "sugar": 53, "suggest": [43, 52], "suitabl": 43, "sum": [10, 38, 43], "summari": [34, 43], "summary_typ": 43, "suno": 55, "super": [10, 39, 43], "support": [10, 14, 33, 43], "sure": [43, 53, 57], "sve": 13, "swin2sr": 55, "switch": 43, "sylveon": 3, "sympi": [7, 30, 33, 51], "sync": [15, 43], "sync_gradi": 43, "syncgetdevmsg": 15, "synchron": 15, "sys": 43, "system": [9, 13, 14, 43, 58], "system_info": [13, 67], "system_instruct": 24, "tablet": 55, "tag": 43, "take": [43, 53], "tar": 52, "target": [10, 12, 13, 36, 43, 66], "target_dtyp": [10, 43], "task": [6, 14, 24, 25, 43, 55, 56, 58, 67], "task1": 24, "task2": 24, "tdrz": 67, "team": 43, "telechat": 15, "telemetri": 43, "temp": [7, 13], "temperatur": [13, 58], "templat": [14, 19], "temporarili": 43, "tensor": [10, 13, 15, 31, 38, 43], "tensorboard": [31, 43], "tensorfloat": 43, "tensorflow": [0, 28, 43], "term": 37, "termin": 58, "test": [6, 10, 12, 14, 22, 36, 43, 45, 52, 53, 56], "test_batch_s": [10, 43], "test_train": 53, "testing_util": 43, "testload": [10, 43], "testset": [10, 43], "text": [3, 6, 13, 31, 43, 53, 55, 58, 64], "text2imag": 43, "text_column_nam": 43, "text_encod": 43, "text_featur": 31, "text_gflop": 31, "text_label": 64, "text_mparam": 31, "text_prob": 31, "text_to_imag": 3, "text_width": 31, "textclassificationpipelin": 55, "tf32": 43, "tfs_z": 13, "than": [6, 43, 53], "that": [6, 9, 10, 13, 14, 43, 52, 53, 55], "the": [6, 9, 10, 14, 15, 24, 30, 33, 37, 39, 43, 49, 51, 52, 53, 55, 56, 61], "their": [53, 57], "them": [10, 43], "then": [10, 18, 43, 53], "there": 43, "these": 43, "they": [33, 43, 53], "thing": [10, 43], "third_parti": 15, "this": [6, 10, 13, 22, 24, 37, 43, 49, 53, 55, 56, 61], "those": [43, 53], "thread": 67, "through": 43, "throw": 43, "tidi": 58, "tiger": 22, "time": [6, 9, 10, 13, 43, 52, 67], "timestamp": [34, 67], "timestep": 43, "timm": [52, 63], "tini": 67, "tiny_vit_21m_512": 52, "tip": 58, "tmp": 43, "to": [2, 3, 6, 9, 10, 12, 13, 14, 15, 31, 36, 37, 38, 39, 43, 49, 52, 53, 55, 56, 57, 58, 61, 67], "togeth": 43, "tok_logg": 43, "token": [6, 13, 31, 34, 43, 53, 55, 56, 58, 61, 62, 67], "token_typ": 13, "tokenization_utils_bas": 43, "tokenize_capt": 43, "tokenize_funct": [43, 53], "tokenized_dataset": [43, 53], "tokenizer_config": 6, "tokenizer_kwarg": 43, "tokenizer_nam": 43, "tokenizers_parallel": 6, "too": 43, "took": 53, "tool": [7, 64], "tool_cal": 14, "toolkit": [7, 12, 17, 22, 24, 27, 42], "top": [10, 37, 43], "top1": 52, "top1_err": 52, "top5": 52, "top5_err": 52, "top_k": [10, 13, 34, 43], "top_p": [13, 34, 58], "topk": 52, "torch": [2, 3, 5, 9, 10, 15, 17, 28, 29, 31, 32, 34, 38, 39, 42, 43, 45, 48, 50, 53, 56, 58, 60, 63], "torch_dtyp": [2, 3, 43, 53, 58], "torch_npu": [5, 9, 15, 30, 31, 38, 39, 42, 43, 45, 48, 51, 53, 56, 58, 60, 61, 63], "torch_util": 43, "torchaudio": [45, 63], "torchrun": [15, 18, 19, 31], "torchvis": [2, 10, 43, 45], "total": [10, 13, 18, 36, 43, 67], "total_batch_s": 43, "total_flo": 53, "total_length": 43, "total_pr": 43, "totensor": [10, 43], "toymodel": 39, "tp": 22, "tpus": 43, "tqdm": 43, "traceback": 15, "track": 43, "tracker": 43, "tracker_config": 43, "tracker_project_nam": 43, "train": [6, 10, 14, 15, 18, 19, 31, 34, 43, 52, 53, 56, 61, 64], "train_batch_s": [3, 10, 43], "train_data_dir": 43, "train_dataload": 43, "train_dataset": [43, 53], "train_dir": 3, "train_fil": 43, "train_loss": [43, 53], "train_result": 43, "train_runtim": 53, "train_sampl": 43, "train_samples_per_second": 53, "train_steps_per_second": 53, "train_text_to_imag": 43, "train_text_to_image_lora_sdxl": 3, "train_transform": 43, "trainabl": 43, "traincoco": 31, "trainer": [43, 61], "trainer_log": 14, "trainer_util": 43, "training_arg": [43, 53], "training_data": [10, 43], "training_loss": 53, "training_util": 43, "trainingargu": [43, 53], "trainload": [10, 43], "trainoutput": 53, "trainset": [10, 43], "tran": 38, "transcrib": [55, 67], "transfom": 56, "transform": [0, 6, 10, 14, 15, 17, 19, 25, 41, 49, 53, 55, 58, 60, 61], "transformer_infer": 9, "transpos": [28, 38], "transpose_box": 38, "transpose_first": 38, "tree": [55, 67], "tri": [6, 10, 15, 43], "trigger": 61, "trl": [17, 59, 61], "truck": [10, 43], "true": [6, 10, 19, 22, 24, 28, 31, 34, 38, 39, 43, 49, 53, 55, 58, 64], "truncat": [34, 43, 53], "trust": 43, "trust_remote_cod": [24, 34, 43], "truth": 43, "ts": [49, 61], "tsd": 15, "tsinghua": [2, 5, 6, 7, 14, 17, 18, 19, 30, 33, 48, 51, 56, 60], "tuna": [2, 5, 6, 7, 14, 17, 18, 19, 30, 33, 48, 51, 56, 60], "tune": 43, "tupl": 43, "turbomind": 21, "tutori": 43, "tweak": 43, "two": [53, 55], "txt": [6, 28, 34, 43, 45, 63, 64], "type": [10, 13, 17, 34, 37, 43, 52, 67], "typical_p": 13, "typing_extens": [7, 30, 33, 51], "u32": 13, "ubuntu": [7, 13], "ubuntu22": 5, "ui": 0, "uint16": 38, "uint32": 38, "uint64": 38, "uint8": 38, "ukplab": 49, "unam": 7, "uncas": 56, "under": [24, 43, 61], "understand": 43, "unet": 43, "unet2dconditionmodel": 43, "unet_ema": 43, "unfinished_sequ": 15, "uniform_": 38, "uninstal": [7, 17, 24, 56], "unit": 14, "unknown": [9, 13, 15, 43], "unless": [43, 53], "unreal": 3, "unsqueez": 31, "unwrap": 43, "unwrap_model": 43, "unzip": 7, "up": [13, 17, 43, 55], "updat": [39, 43], "update_from_str": 43, "upgrad": [24, 61], "upload": 43, "upload_fold": 43, "upscal": 55, "upscaled_img": 55, "urgent": 55, "url": [43, 45, 55, 57], "us": [10, 43, 55], "us_foreign_polici": 25, "usag": [7, 24, 43], "use": [6, 10, 13, 14, 18, 19, 24, 33, 43, 49, 52, 56, 61, 67], "use_8bit_adam": 43, "use_cach": 24, "use_cann": 27, "use_ema": 43, "use_fast": 43, "use_fast_token": 43, "use_lock": 38, "use_modelscope_hub": 19, "use_nesterov": 38, "use_residu": [10, 43], "user": [7, 14, 55, 58, 61], "useradd": 7, "usermod": 7, "userwarn": 61, "usr": [5, 7, 12, 15, 17, 22, 27, 43], "usual": 33, "utf": [39, 43], "util": [10, 14, 15, 31, 43], "uvicorn": 14, "v0": [0, 10, 43], "v1": [0, 2, 14, 38, 43, 46, 67], "v2": [38, 43, 49, 67], "v3": [13, 67], "v4": 0, "v_in": 38, "v_out": 38, "v_predict": 43, "v_proj": 19, "vae": [3, 43], "vae_nam": 3, "val": [31, 52], "val2017": 55, "val_imgs_grid": 43, "val_siz": 19, "valid": [6, 10, 14, 43, 52, 53], "validation_epoch": 43, "validation_fil": 43, "validation_prompt": [3, 43], "validation_split_percentag": 43, "valu": [10, 13, 25, 43], "valueerror": 43, "var": [38, 43], "var_in": 38, "var_out": 38, "variabl": 6, "variant": 43, "varieti": 14, "various": [52, 53], "vb": 3, "ve": 53, "venv": 45, "verbos": 24, "veri": 43, "version": [7, 9, 12, 13, 14, 15, 17, 22, 25, 30, 33, 34, 43, 51, 61, 63], "vhulk2211": 17, "via": [18, 43], "view": [10, 43], "virolog": 25, "virtual": [15, 33], "visit": 14, "vit": [31, 55], "vl": 22, "vllm": 33, "vlms": 21, "vocab": [6, 13, 43], "vocab_on": 13, "vocab_s": 13, "vqa": 55, "vsx": [13, 67], "w4a16": 22, "wait": [14, 15, 53], "wait_for_everyon": 43, "waitrsp": 15, "wall_clock_breakdown": [10, 43], "wandb": 43, "wandb_arg": 24, "wandb_info": 43, "wandb_run_url": 43, "want": [43, 57], "warmup": [31, 43, 52], "warmup_max_lr": [10, 43], "warmup_min_lr": [10, 43], "warmup_num_step": [10, 43], "warmup_step": 19, "warmuplr": [10, 43], "warn": [6, 19, 24, 43, 49], "warranti": 43, "was": [43, 53], "wasm_simd": [13, 67], "watch": 53, "wav": [64, 67], "wav_id": 64, "wav_path": 64, "way": 43, "wd": 31, "we": [10, 43], "wear": 46, "web": [0, 13], "webchat": 14, "websit": [12, 13], "webui": [16, 46], "weight": [6, 38, 43, 52, 56], "weight_decay": [10, 38, 43], "weight_dtyp": 43, "well": 43, "wenet": 0, "wer": 64, "were": [6, 56], "wget": 45, "wh_ratio_clip": 38, "what": [14, 43, 55, 58, 67], "wheel": [7, 9, 30, 33, 51], "when": [10, 43, 53], "where": [43, 53], "whether": 43, "which": [43, 61], "while": 6, "whisper": 0, "whisper_backend_init_gpu": 67, "whisper_init_from_file_with_params_no_st": 67, "whisper_init_st": 67, "whisper_init_with_params_no_st": 67, "whisper_model_load": 67, "whisper_print_tim": 67, "whl": 45, "who": [53, 58], "whole": 43, "whose": 52, "wide": 14, "width": 46, "will": [9, 10, 43, 53, 55, 61], "window": [10, 43], "with": [3, 6, 10, 13, 15, 17, 28, 31, 33, 37, 38, 43, 55, 58], "with_caroten": 36, "with_debug": 36, "with_ipp": 36, "with_transform": 43, "without": [43, 57], "wizard": 53, "word": [34, 55], "work": [22, 43, 53], "work_dir": 22, "workaround": 43, "worker": 31, "workload": [14, 61], "world": [10, 43], "world_religion": 25, "world_siz": 39, "would": 43, "write": 43, "write_out": 24, "writer": 43, "written": 43, "wrong": 61, "www": 43, "x0": 38, "x1": 38, "x2": 55, "x6agfqsjhkhrulu8efso5qde8kriptk5rgh9olrtmeas3sid": 57, "x86": [7, 42], "x_0": 43, "xformer": [43, 45], "xformers_vers": 43, "xl": 3, "xla_spawn": 43, "xxx": [7, 17, 36, 43], "xywh": 38, "y0": 38, "y1": 38, "y2": 38, "yaml": [14, 17, 18], "ydshieh": 55, "year": 53, "yell": 53, "yelp": 53, "yelp_review_ful": 53, "yet": 53, "you": [6, 10, 22, 43, 49, 56, 57, 58, 61, 67], "your": [9, 13, 14, 31, 43, 57, 58, 61, 67], "your_env_nam": [51, 56], "your_path": [14, 18, 19], "yum": 7, "zephyr": 55, "zero": [10, 38, 43], "zero3_init_context_manag": 43, "zero_grad": 43, "zero_optim": [10, 43], "zeroshot": 31, "zip": [43, 45], "zlib": 7, "zlib1g": 7, "zoo": 28, "zsh": 45}, "titles": ["\u6607\u817e\u5f00\u6e90", "Diffusers", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "Accelerate", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "\u5feb\u901f\u5b89\u88c5\u6607\u817e\u73af\u5883", "DeepSpeed", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "llama.cpp", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "\u5168\u6d41\u7a0b\u6607\u817e\u5b9e\u8df5", "FAQ", "LLaMA-Factory", "\u5b89\u88c5\u6307\u5357", "\u5355\u673a\u591a\u5361\u5fae\u8c03", "\u5feb\u901f\u5f00\u59cb", "LMDeploy", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "LM-Evalution-Harness", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "ONNX Runtime", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "open_clip", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "OpenCompass", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "OpenCV", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "API\u8bf4\u660e", "\u529f\u80fd\u6837\u4f8b", "FAQ", "PyTorch", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "Stable-Diffusion-WebUI", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "Sentence Transformers", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "timm", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "\u5fae\u8c03\u9884\u8bad\u7ec3\u6a21\u578b", "Transformers", "\u63a8\u7406", "\u5b89\u88c5\u6307\u5357", "\u6a21\u578b\u83b7\u53d6", "\u5feb\u901f\u5f00\u59cb", "Transformer Reinforcement Learning", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "WeNet", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb", "Whisper.cpp", "\u5b89\u88c5\u6307\u5357", "\u5feb\u901f\u5f00\u59cb"], "titleterms": {"acceler": [4, 5], "api": [14, 38], "automodelforcausallm": 58, "benchmark": 14, "board": 14, "cann": 22, "cmvn": 64, "conda": 45, "cpp": [11, 12, 65, 66], "ddp": 39, "deepspe": [8, 9, 10, 43], "diffus": [1, 2, 43, 44, 45], "driver": 22, "evalu": 24, "evalut": 23, "factori": [16, 17], "faq": [15, 40], "firmwar": 22, "fsdp": 39, "git": 57, "har": [23, 24], "hf": 57, "huggingfac": 57, "learn": 59, "lfs": 57, "llama": [11, 12, 16, 17], "llm": 22, "lm": [23, 24], "lm_deploy": 21, "lmdeploy": 20, "lora": [3, 14, 18, 19], "meta": 57, "miniconda": 45, "mirror": 57, "npu": [18, 30, 31, 33, 51, 52], "onnx": [26, 27], "open_clip": [29, 30], "opencompass": [32, 33, 34], "opencv": [35, 36], "pip": [2, 21, 27], "pipelin": [55, 58], "py": 34, "python": [2, 30, 33, 36, 37, 51, 63], "pytorch": [41, 42], "reinforc": 59, "run": 34, "runtim": [26, 27], "sentenc": [47, 48], "server": 14, "sft": 14, "stabl": [44, 45], "timm": [50, 51], "token": 64, "torch": [30, 33, 51, 52], "trainer": 53, "transform": [43, 47, 48, 54, 56, 59], "trl": 60, "vlm": 22, "webui": [14, 44, 45], "wenet": [62, 63, 64], "whisper": [65, 66, 67], "yaml": 19}}) \ No newline at end of file diff --git a/sources/Diffusers/index.html b/sources/Diffusers/index.html new file mode 100644 index 0000000..20472d0 --- /dev/null +++ b/sources/Diffusers/index.html @@ -0,0 +1,158 @@ + + + + + + + + + Diffusers — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+ + +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/Diffusers/install.html b/sources/Diffusers/install.html new file mode 100644 index 0000000..9bd1c8d --- /dev/null +++ b/sources/Diffusers/install.html @@ -0,0 +1,189 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本教程面向使用 Diffusers & 昇腾开发者,帮助完成昇腾环境下 Diffusers 的安装。

+
+

昇腾环境安装

+

请根据已有昇腾产品型号及CPU架构等按照 快速安装昇腾环境指引 进行昇腾环境安装,或直接获取对应产品的昇腾环境镜像 ascendai/cann

+
+

警告

+

CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。

+
+
+
+

Diffusers 安装

+
+
+

Python 环境创建

+
1# 创建名为 diffusers 的 python 3.10 的虚拟环境
+2conda create -y -n diffusers python=3.10
+3# 激活虚拟环境
+4conda activate diffusers
+
+
+
+
+

pip 安装

+

通过以下指令安装 Diffusers 及 torch-npu:

+
1pip install diffusers torch==2.2.0 torch-npu==2.2.0 torchvision -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+
+

安装校验

+

执行以下代码,若无任何报错,仅打印模型下载过程,即说明安装成功:

+
1from diffusers import DiffusionPipeline
+2import torch
+3
+4pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
+5pipeline.to("npu")
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/Diffusers/quick_start.html b/sources/Diffusers/quick_start.html new file mode 100644 index 0000000..b415929 --- /dev/null +++ b/sources/Diffusers/quick_start.html @@ -0,0 +1,231 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境及 Diffusers !

+
+

本示例以文生图 Diffusers 库中文生图任务为样例,展示如何进行文生图模型 stable-diffusion-xl-base-1.0 的基于 LoRA 的微调及动态合并 LoRA 的推理。

+
+

文生图

+
+

模型及数据集下载

+
    +
  1. 请提前下载 stabilityai/stable-diffusion-xl-base-1.0 模型至自定义路径

  2. +
  3. 请提前下载 madebyollin/sdxl-vae-fp16-fix 模型至自定义路径

  4. +
  5. 请提前下载 reach-vb/pokemon-blip-captions 数据集至自定义路径

  6. +
+
+
+

基于 LoRA 的微调

+

进入 Diffusers 项目目录,新建并执行以下脚本:

+
+

备注

+

请根据 模型及数据集下载 中模型及数据集的实际缓存路径指定 stable-diffusion-xl-base-1.0 模型缓存路径 MODEL_NAME,sdxl-vae-fp16-fix 模型缓存路径 VAE_NAME 和。

+
+
 1export MODEL_NAME="./models_ckpt/stable-diffusion-xl-base-1.0/"
+ 2export VAE_NAME="./ckpt/sdxl-vae-fp16-fix"
+ 3export TRAIN_DIR="~/diffusers/data/pokemon-blip-captions/pokemon"
+ 4
+ 5python3  ./examples/text_to_image/train_text_to_image_lora_sdxl.py \
+ 6    --pretrained_model_name_or_path=$MODEL_NAME \
+ 7    --pretrained_vae_model_name_or_path=$VAE_NAME \
+ 8    --dataset_name=$DATASET_NAME --caption_column="text" \
+ 9    --resolution=1024 \
+10    --random_flip \
+11    --train_batch_size=1 \
+12    --num_train_epochs=2 \
+13    --checkpointing_steps=500 \
+14    --learning_rate=1e-04 \
+15    --lr_scheduler="constant" \
+16    --lr_warmup_steps=0 \
+17    --mixed_precision="no" \
+18    --seed=42 \
+19    --output_dir="sd-pokemon-model-lora-sdxl" \
+20    --validation_prompt="cute dragon creature"
+
+
+

微调过程无报错,并且终端显示 Steps: 100% 的进度条说明微调成功。

+
+
+

动态合并 LoRA 的推理

+
+

备注

+

请根据 模型及数据集下载 中模型实际缓存路径指定 model_path

+

根据 基于 LoRA 的微调 中指定的 LoRA 模型路径 output_dir 指定 lora_model_path

+

[可选] 修改 prompt 可使得生成图像改变

+
+
 1from diffusers import DiffusionPipeline
+ 2import torch
+ 3
+ 4lora_model_path = "path/to/sd-pokemon-model-lora-sdxl/checkpoint-800/"
+ 5model_path = "./models_ckpt/stable-diffusion-xl-base-1.0/"
+ 6pipe = DiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
+ 7
+ 8# 将模型放到 NPU 上
+ 9pipe.to("npu")
+10
+11# 加载 LoRA 权重
+12pipe.load_lora_weights(lora_model_path)
+13# 输入 prompt
+14prompt = "Sylveon Pokemon with elegant features, magical design, \
+15        light purple aura, extremely detailed and intricate markings, \
+16        photo realistic, unreal engine, octane render"
+17# 推理
+18image = pipe(prompt, num_inference_steps=30, guidance_scale=7.5).images[0]
+19
+20image.save("pokemon-finetuned-inference-generation.png")
+
+
+

微调过程无报错,并且终端显示 Loading pipeline components...: 100% 的进度条说明微调成功。 +查看当前目录下保存的 pokemon-finetuned-inference-generation.png 图像,可根据 prompt 生成内容相关的图像说明推理成功。

+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/accelerate/index.html b/sources/accelerate/index.html new file mode 100644 index 0000000..e36baa1 --- /dev/null +++ b/sources/accelerate/index.html @@ -0,0 +1,155 @@ + + + + + + + + + Accelerate — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Accelerate

+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/accelerate/install.html b/sources/accelerate/install.html new file mode 100644 index 0000000..2dc8d94 --- /dev/null +++ b/sources/accelerate/install.html @@ -0,0 +1,172 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本教程面向使用 Accelerate & 昇腾的开发者,帮助完成昇腾环境下 Accelerate 的安装。

+
+

Accelerate 下载安装

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境! +或者直接使用具备昇腾环境的镜像 ascendai/cann:8.0.rc1-910b-ubuntu22.04, +更多的版本可至 ascendai/cann 获取。

+
+
+

启动镜像

+
docker run -itd --network host -v /usr/local/dcmi:/usr/local/dcmi -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi -v /usr/local/Ascend/driver:/usr/local/Ascend/driver -v /etc/ascend_install.info:/etc/ascend_install.info --device /dev/davinci7 --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc --shm-size 16G --name accelerate ascendai/cann:8.0.rc1-910b-ubuntu22.04 bash
+
+
+
+
+

安装 Accelerate 及依赖包

+
pip install torch==2.2.0 torch_npu==2.2.0 accelerate -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/accelerate/quick_start.html b/sources/accelerate/quick_start.html new file mode 100644 index 0000000..3ce48a8 --- /dev/null +++ b/sources/accelerate/quick_start.html @@ -0,0 +1,206 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装指南 准备好昇腾环境及 Accelerate !

+
+

本教程以一个简单的 NLP 模型为例,讲述如何使用 Accelerate 在昇腾 NPU 上进行模型的训练。

+
+

前置准备

+

本篇将使用到 HuggingFace 其他工具链及 scikit-learn 库,请使用以下指令安装:

+
pip install datasets evaluate transformers scikit-learn -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+

本篇样例代码为 Accelrate 官方样例,需提前进行下载

+
git clone https://github.com/huggingface/accelerate.git
+
+
+
+
+

模型训练

+
1# 替换HF域名,方便国内用户进行数据及模型的下载
+2export HF_ENDPOINT=https://hf-mirror.com
+3# 进入项目目录
+4cd accelerate/examples
+5# 模型训练
+6python nlp_example.py
+
+
+

出现如下日志代表训练成功:

+
Downloading builder script: 5.75kB [00:01, 3.69kB/s]
+tokenizer_config.json: 100%|████████████████████████████████████████████████████████████████████████████████████████| 49.0/49.0 [00:00<00:00, 237kB/s]
+config.json: 570B [00:00, 2.23MB/s]
+vocab.txt: 79.5kB [00:12, 3.45kB/s]Error while downloading from https://hf-mirror.com/bert-base-cased/resolve/main/vocab.txt: HTTPSConnectionPool(host='hf-mirror.com', port=443): Read timed out.
+Trying to resume download...
+vocab.txt: 213kB [00:07, 15.5kB/s]]
+vocab.txt: 91.4kB [00:32, 2.81kB/s]
+tokenizer.json: 436kB [00:19, 22.8kB/s]
+Downloading readme: 35.3kB [00:01, 26.4kB/s]
+Downloading data: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 649k/649k [00:02<00:00, 288kB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████| 75.7k/75.7k [00:00<00:00, 77.8kB/s]
+Downloading data: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 308k/308k [00:01<00:00, 204kB/s]
+Generating train split: 100%|███████████████████████████████████████████████████████████████████████████| 3668/3668 [00:00<00:00, 27701.23 examples/s]
+Generating validation split: 100%|████████████████████████████████████████████████████████████████████████| 408/408 [00:00<00:00, 73426.42 examples/s]
+Generating test split: 100%|███████████████████████████████████████████████████████████████████████████| 1725/1725 [00:00<00:00, 246370.91 examples/s]
+Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3668/3668 [00:01<00:00, 3378.05 examples/s]
+Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 408/408 [00:00<00:00, 3553.72 examples/s]
+Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████| 1725/1725 [00:00<00:00, 5109.03 examples/s]
+model.safetensors: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 436M/436M [02:42<00:00, 2.68MB/s]
+Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+  - Avoid using `tokenizers` before the fork if possible
+  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
+epoch 0: {'accuracy': 0.8014705882352942, 'f1': 0.8439306358381503}
+epoch 1: {'accuracy': 0.8578431372549019, 'f1': 0.8975265017667845}
+epoch 2: {'accuracy': 0.8700980392156863, 'f1': 0.9087779690189329}
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/ascend/quick_install.html b/sources/ascend/quick_install.html new file mode 100644 index 0000000..2db3d2b --- /dev/null +++ b/sources/ascend/quick_install.html @@ -0,0 +1,369 @@ + + + + + + + + + 快速安装昇腾环境 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速安装昇腾环境

+

跟随指导,在您的机器上快速安装昇腾环境。

+
+

1. 系统要求

+
+

1.1 前置检查

+

确认昇腾AI处理器已经安装妥当

+
lspci | grep 'Processing accelerators'
+
+
+

确认操作系统架构及版本

+
uname -m && cat /etc/*release
+
+
+

确认Python版本

+
python --version
+
+
+
+
+

1.2 软件要求

+ + + + + + + + + + + + + + +

软件

版本

操作系统

openEuler20.03/22.03, Ubuntu 20.04/22.04

Python

3.8, 3.9, 3.10

+
+
+
+

2. 环境安装

+

根据您的需求,选择合适的软件包版本:

+
+

警告

+

以下文档需要使用非root用户进行安装安装

+
+ +
+
+
+
安装方式
+
操作系统
+
操作系统版本
+
CPU架构
+
NPU型号
+
昇腾套件版本
+
+
+
+
安装方式
+
直接安装
+
Docker
+
+
+
操作系统
+
openEuler
+
Ubuntu
+
+
+
操作系统版本
+
+
+
CPU架构
+
x86-64
+
aarch64
+
+
+
NPU型号
+
Atlas 800T A2 训练卡
+
Atlas 300I Pro 推理卡
+
+
+
昇腾套件版本
+ +
Driver
+
Firmware
+
+
+
+ +
+
+

3. 卸载

+

卸载驱动

+
sudo /usr/local/Ascend/firmware/script/uninstall.sh
+
+
+

卸载固件

+
sudo /usr/local/Ascend/driver/script/uninstall.sh
+
+
+

卸载CANN-toolkit

+
~/Ascend/ascend-toolkit/<cann_version>/{arch}-linux/script/uninstall.sh
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/deepspeed/index.html b/sources/deepspeed/index.html new file mode 100644 index 0000000..bbc05ff --- /dev/null +++ b/sources/deepspeed/index.html @@ -0,0 +1,158 @@ + + + + + + + + + DeepSpeed — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/sources/deepspeed/install.html b/sources/deepspeed/install.html new file mode 100644 index 0000000..86a58bf --- /dev/null +++ b/sources/deepspeed/install.html @@ -0,0 +1,210 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+
+

备注

+

在本示例之前,请确保已经安装了 昇腾环境PyTorch 环境。

+
+
+

1. 安装DeepSpeed

+

安装DeepSpeed最简单的方式是通过 pip

+
1pip install deepspeed
+
+
+
+
+

2. 通过源码安装

+

GitHub 克隆DeepSpeed项目后,可以通过 pip 来通过源码编译。

+
1pip install .
+
+
+
+
+

3. 预编译DeepSpeed算子(可选)

+

如果不想使用JIT编译模式,而想要预编译DeepSpeed算子,可以通过设置环境变量的方式完成算子的预编译。

+
1DS_BUILD_OPS=1 pip install deepspeed
+
+
+
+
+

4. 安装验证

+

安装完成后,可以通过 ds_report 命令查看安装结果

+
 1--------------------------------------------------
+ 2DeepSpeed C++/CUDA extension op report
+ 3--------------------------------------------------
+ 4NOTE: Ops not installed will be just-in-time (JIT) compiled at
+ 5    runtime if needed. Op compatibility means that your system
+ 6    meet the required dependencies to JIT install the op.
+ 7--------------------------------------------------
+ 8JIT compiled ops requires ninja
+ 9ninja .................. [OKAY]
+10--------------------------------------------------
+11op name ................ installed .. compatible
+12--------------------------------------------------
+13deepspeed_not_implemented  [NO] ....... [OKAY]
+14async_io ............... [NO] ....... [OKAY]
+15cpu_adagrad ............ [NO] ....... [OKAY]
+16cpu_adam ............... [NO] ....... [OKAY]
+17cpu_lion ............... [NO] ....... [OKAY]
+18fused_adam ............. [NO] ....... [OKAY]
+19transformer_inference .. [NO] ....... [OKAY]
+20--------------------------------------------------
+21DeepSpeed general environment info:
+22torch install path ............... ['/root/miniconda3/envs/ds/lib/python3.10/site-packages/torch']
+23torch version .................... 2.2.0
+24deepspeed install path ........... ['/root/miniconda3/envs/ds/lib/python3.10/site-packages/deepspeed']
+25deepspeed info ................... 0.14.4, unknown, unknown
+26deepspeed wheel compiled w. ...... torch 2.2
+27torch_npu install path ........... ['/root/miniconda3/envs/ds/lib/python3.10/site-packages/torch_npu']
+28torch_npu version ................ 2.2.0
+29ascend_cann version .............. 8.0.RC2.alpha002
+30shared memory (/dev/shm) size .... 20.00 GB
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/deepspeed/quick_start.html b/sources/deepspeed/quick_start.html new file mode 100644 index 0000000..6ce41b1 --- /dev/null +++ b/sources/deepspeed/quick_start.html @@ -0,0 +1,577 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

在本示例之前,请确保已经安装了 DeepSpeed 环境。 如果还未安装,可以执行 pip install deepspeed 完成安装。

+
+
+

1. 使用DeepSpeed多卡并行训练

+

以下代码使用了cifar10数据集,使用DeepSpeed训练模型在多张NPU卡上进行模型训练(来自 DeepSpeed Examples),自DeepSpeed v0.12.6之后,代码无需任何修改,即可自动检测NPU并进行训练。

+
  1import argparse
+  2import os
+  3
+  4import deepspeed
+  5import torch
+  6import torch.nn as nn
+  7import torch.nn.functional as F
+  8import torchvision
+  9import torchvision.transforms as transforms
+ 10from deepspeed.accelerator import get_accelerator
+ 11from deepspeed.moe.utils import split_params_into_different_moe_groups_for_optimizer
+ 12
+ 13
+ 14def add_argument():
+ 15    parser = argparse.ArgumentParser(description="CIFAR")
+ 16
+ 17    # For train.
+ 18    parser.add_argument(
+ 19        "-e",
+ 20        "--epochs",
+ 21        default=30,
+ 22        type=int,
+ 23        help="number of total epochs (default: 30)",
+ 24    )
+ 25    parser.add_argument(
+ 26        "--local_rank",
+ 27        type=int,
+ 28        default=-1,
+ 29        help="local rank passed from distributed launcher",
+ 30    )
+ 31    parser.add_argument(
+ 32        "--log-interval",
+ 33        type=int,
+ 34        default=2000,
+ 35        help="output logging information at a given interval",
+ 36    )
+ 37
+ 38    # For mixed precision training.
+ 39    parser.add_argument(
+ 40        "--dtype",
+ 41        default="fp16",
+ 42        type=str,
+ 43        choices=["bf16", "fp16", "fp32"],
+ 44        help="Datatype used for training",
+ 45    )
+ 46
+ 47    # For ZeRO Optimization.
+ 48    parser.add_argument(
+ 49        "--stage",
+ 50        default=0,
+ 51        type=int,
+ 52        choices=[0, 1, 2, 3],
+ 53        help="Datatype used for training",
+ 54    )
+ 55
+ 56    # For MoE (Mixture of Experts).
+ 57    parser.add_argument(
+ 58        "--moe",
+ 59        default=False,
+ 60        action="store_true",
+ 61        help="use deepspeed mixture of experts (moe)",
+ 62    )
+ 63    parser.add_argument(
+ 64        "--ep-world-size", default=1, type=int, help="(moe) expert parallel world size"
+ 65    )
+ 66    parser.add_argument(
+ 67        "--num-experts",
+ 68        type=int,
+ 69        nargs="+",
+ 70        default=[
+ 71            1,
+ 72        ],
+ 73        help="number of experts list, MoE related.",
+ 74    )
+ 75    parser.add_argument(
+ 76        "--mlp-type",
+ 77        type=str,
+ 78        default="standard",
+ 79        help="Only applicable when num-experts > 1, accepts [standard, residual]",
+ 80    )
+ 81    parser.add_argument(
+ 82        "--top-k", default=1, type=int, help="(moe) gating top 1 and 2 supported"
+ 83    )
+ 84    parser.add_argument(
+ 85        "--min-capacity",
+ 86        default=0,
+ 87        type=int,
+ 88        help="(moe) minimum capacity of an expert regardless of the capacity_factor",
+ 89    )
+ 90    parser.add_argument(
+ 91        "--noisy-gate-policy",
+ 92        default=None,
+ 93        type=str,
+ 94        help="(moe) noisy gating (only supported with top-1). Valid values are None, RSample, and Jitter",
+ 95    )
+ 96    parser.add_argument(
+ 97        "--moe-param-group",
+ 98        default=False,
+ 99        action="store_true",
+100        help="(moe) create separate moe param groups, required when using ZeRO w. MoE",
+101    )
+102
+103    # Include DeepSpeed configuration arguments.
+104    parser = deepspeed.add_config_arguments(parser)
+105
+106    args = parser.parse_args()
+107
+108    return args
+109
+110
+111def create_moe_param_groups(model):
+112    """Create separate parameter groups for each expert."""
+113    parameters = {"params": [p for p in model.parameters()], "name": "parameters"}
+114    return split_params_into_different_moe_groups_for_optimizer(parameters)
+115
+116
+117def get_ds_config(args):
+118    """Get the DeepSpeed configuration dictionary."""
+119    ds_config = {
+120        "train_batch_size": 16,
+121        "steps_per_print": 2000,
+122        "optimizer": {
+123            "type": "Adam",
+124            "params": {
+125                "lr": 0.001,
+126                "betas": [0.8, 0.999],
+127                "eps": 1e-8,
+128                "weight_decay": 3e-7,
+129            },
+130        },
+131        "scheduler": {
+132            "type": "WarmupLR",
+133            "params": {
+134                "warmup_min_lr": 0,
+135                "warmup_max_lr": 0.001,
+136                "warmup_num_steps": 1000,
+137            },
+138        },
+139        "gradient_clipping": 1.0,
+140        "prescale_gradients": False,
+141        "bf16": {"enabled": args.dtype == "bf16"},
+142        "fp16": {
+143            "enabled": args.dtype == "fp16",
+144            "fp16_master_weights_and_grads": False,
+145            "loss_scale": 0,
+146            "loss_scale_window": 500,
+147            "hysteresis": 2,
+148            "min_loss_scale": 1,
+149            "initial_scale_power": 15,
+150        },
+151        "wall_clock_breakdown": False,
+152        "zero_optimization": {
+153            "stage": args.stage,
+154            "allgather_partitions": True,
+155            "reduce_scatter": True,
+156            "allgather_bucket_size": 50000000,
+157            "reduce_bucket_size": 50000000,
+158            "overlap_comm": True,
+159            "contiguous_gradients": True,
+160            "cpu_offload": False,
+161        },
+162    }
+163    return ds_config
+164
+165
+166class Net(nn.Module):
+167    def __init__(self, args):
+168        super(Net, self).__init__()
+169        self.conv1 = nn.Conv2d(3, 6, 5)
+170        self.pool = nn.MaxPool2d(2, 2)
+171        self.conv2 = nn.Conv2d(6, 16, 5)
+172        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+173        self.fc2 = nn.Linear(120, 84)
+174        self.moe = args.moe
+175        if self.moe:
+176            fc3 = nn.Linear(84, 84)
+177            self.moe_layer_list = []
+178            for n_e in args.num_experts:
+179                # Create moe layers based on the number of experts.
+180                self.moe_layer_list.append(
+181                    deepspeed.moe.layer.MoE(
+182                        hidden_size=84,
+183                        expert=fc3,
+184                        num_experts=n_e,
+185                        ep_size=args.ep_world_size,
+186                        use_residual=args.mlp_type == "residual",
+187                        k=args.top_k,
+188                        min_capacity=args.min_capacity,
+189                        noisy_gate_policy=args.noisy_gate_policy,
+190                    )
+191                )
+192            self.moe_layer_list = nn.ModuleList(self.moe_layer_list)
+193            self.fc4 = nn.Linear(84, 10)
+194        else:
+195            self.fc3 = nn.Linear(84, 10)
+196
+197    def forward(self, x):
+198        x = self.pool(F.relu(self.conv1(x)))
+199        x = self.pool(F.relu(self.conv2(x)))
+200        x = x.view(-1, 16 * 5 * 5)
+201        x = F.relu(self.fc1(x))
+202        x = F.relu(self.fc2(x))
+203        if self.moe:
+204            for layer in self.moe_layer_list:
+205                x, _, _ = layer(x)
+206            x = self.fc4(x)
+207        else:
+208            x = self.fc3(x)
+209        return x
+210
+211
+212def test(model_engine, testset, local_device, target_dtype, test_batch_size=4):
+213    """Test the network on the test data.
+214
+215    Args:
+216        model_engine (deepspeed.runtime.engine.DeepSpeedEngine): the DeepSpeed engine.
+217        testset (torch.utils.data.Dataset): the test dataset.
+218        local_device (str): the local device name.
+219        target_dtype (torch.dtype): the target datatype for the test data.
+220        test_batch_size (int): the test batch size.
+221
+222    """
+223    # The 10 classes for CIFAR10.
+224    classes = (
+225        "plane",
+226        "car",
+227        "bird",
+228        "cat",
+229        "deer",
+230        "dog",
+231        "frog",
+232        "horse",
+233        "ship",
+234        "truck",
+235    )
+236
+237    # Define the test dataloader.
+238    testloader = torch.utils.data.DataLoader(
+239        testset, batch_size=test_batch_size, shuffle=False, num_workers=0
+240    )
+241
+242    # For total accuracy.
+243    correct, total = 0, 0
+244    # For accuracy per class.
+245    class_correct = list(0.0 for i in range(10))
+246    class_total = list(0.0 for i in range(10))
+247
+248    # Start testing.
+249    model_engine.eval()
+250    with torch.no_grad():
+251        for data in testloader:
+252            images, labels = data
+253            if target_dtype != None:
+254                images = images.to(target_dtype)
+255            outputs = model_engine(images.to(local_device))
+256            _, predicted = torch.max(outputs.data, 1)
+257            # Count the total accuracy.
+258            total += labels.size(0)
+259            correct += (predicted == labels.to(local_device)).sum().item()
+260
+261            # Count the accuracy per class.
+262            batch_correct = (predicted == labels.to(local_device)).squeeze()
+263            for i in range(test_batch_size):
+264                label = labels[i]
+265                class_correct[label] += batch_correct[i].item()
+266                class_total[label] += 1
+267
+268    if model_engine.local_rank == 0:
+269        print(
+270            f"Accuracy of the network on the {total} test images: {100 * correct / total : .0f} %"
+271        )
+272
+273        # For all classes, print the accuracy.
+274        for i in range(10):
+275            print(
+276                f"Accuracy of {classes[i] : >5s} : {100 * class_correct[i] / class_total[i] : 2.0f} %"
+277            )
+278
+279
+280def main(args):
+281    # Initialize DeepSpeed distributed backend.
+282    deepspeed.init_distributed()
+283    _local_rank = int(os.environ.get("LOCAL_RANK"))
+284    get_accelerator().set_device(_local_rank)
+285
+286    ########################################################################
+287    # Step1. Data Preparation.
+288    #
+289    # The output of torchvision datasets are PILImage images of range [0, 1].
+290    # We transform them to Tensors of normalized range [-1, 1].
+291    #
+292    # Note:
+293    #     If running on Windows and you get a BrokenPipeError, try setting
+294    #     the num_worker of torch.utils.data.DataLoader() to 0.
+295    ########################################################################
+296    transform = transforms.Compose(
+297        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+298    )
+299
+300    if torch.distributed.get_rank() != 0:
+301        # Might be downloading cifar data, let rank 0 download first.
+302        torch.distributed.barrier()
+303
+304    # Load or download cifar data.
+305    trainset = torchvision.datasets.CIFAR10(
+306        root="./data", train=True, download=True, transform=transform
+307    )
+308    testset = torchvision.datasets.CIFAR10(
+309        root="./data", train=False, download=True, transform=transform
+310    )
+311
+312    if torch.distributed.get_rank() == 0:
+313        # Cifar data is downloaded, indicate other ranks can proceed.
+314        torch.distributed.barrier()
+315
+316    ########################################################################
+317    # Step 2. Define the network with DeepSpeed.
+318    #
+319    # First, we define a Convolution Neural Network.
+320    # Then, we define the DeepSpeed configuration dictionary and use it to
+321    # initialize the DeepSpeed engine.
+322    ########################################################################
+323    net = Net(args)
+324
+325    # Get list of parameters that require gradients.
+326    parameters = filter(lambda p: p.requires_grad, net.parameters())
+327
+328    # If using MoE, create separate param groups for each expert.
+329    if args.moe_param_group:
+330        parameters = create_moe_param_groups(net)
+331
+332    # Initialize DeepSpeed to use the following features.
+333    #   1) Distributed model.
+334    #   2) Distributed data loader.
+335    #   3) DeepSpeed optimizer.
+336    ds_config = get_ds_config(args)
+337    model_engine, optimizer, trainloader, __ = deepspeed.initialize(
+338        args=args,
+339        model=net,
+340        model_parameters=parameters,
+341        training_data=trainset,
+342        config=ds_config,
+343    )
+344
+345    # Get the local device name (str) and local rank (int).
+346    local_device = get_accelerator().device_name(model_engine.local_rank)
+347    local_rank = model_engine.local_rank
+348
+349    # For float32, target_dtype will be None so no datatype conversion needed.
+350    target_dtype = None
+351    if model_engine.bfloat16_enabled():
+352        target_dtype = torch.bfloat16
+353    elif model_engine.fp16_enabled():
+354        target_dtype = torch.half
+355
+356    # Define the Classification Cross-Entropy loss function.
+357    criterion = nn.CrossEntropyLoss()
+358
+359    ########################################################################
+360    # Step 3. Train the network.
+361    #
+362    # This is when things start to get interesting.
+363    # We simply have to loop over our data iterator, and feed the inputs to the
+364    # network and optimize. (DeepSpeed handles the distributed details for us!)
+365    ########################################################################
+366
+367    for epoch in range(args.epochs):  # loop over the dataset multiple times
+368        running_loss = 0.0
+369        for i, data in enumerate(trainloader):
+370            # Get the inputs. ``data`` is a list of [inputs, labels].
+371            inputs, labels = data[0].to(local_device), data[1].to(local_device)
+372
+373            # Try to convert to target_dtype if needed.
+374            if target_dtype != None:
+375                inputs = inputs.to(target_dtype)
+376
+377            outputs = model_engine(inputs)
+378            loss = criterion(outputs, labels)
+379
+380            model_engine.backward(loss)
+381            model_engine.step()
+382
+383            # Print statistics
+384            running_loss += loss.item()
+385            if local_rank == 0 and i % args.log_interval == (
+386                args.log_interval - 1
+387            ):  # Print every log_interval mini-batches.
+388                print(
+389                    f"[{epoch + 1 : d}, {i + 1 : 5d}] loss: {running_loss / args.log_interval : .3f}"
+390                )
+391                running_loss = 0.0
+392    print("Finished Training")
+393
+394    ########################################################################
+395    # Step 4. Test the network on the test data.
+396    ########################################################################
+397    test(model_engine, testset, local_device, target_dtype)
+398
+399
+400if __name__ == "__main__":
+401    args = add_argument()
+402    main(args)
+
+
+
+
+

2. 训练结果查看

+

训练完成后,会打印模型对图像识别的结果。

+
 1Finished Training
+ 2Accuracy of the network on the 10000 test images:  57 %
+ 3Accuracy of plane :  65 %
+ 4Accuracy of   car :  67 %
+ 5Accuracy of  bird :  52 %
+ 6Accuracy of   cat :  34 %
+ 7Accuracy of  deer :  52 %
+ 8Accuracy of   dog :  49 %
+ 9Accuracy of  frog :  59 %
+10Accuracy of horse :  66 %
+11Accuracy of  ship :  66 %
+12Accuracy of truck :  56 %
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/llama_cpp/index.html b/sources/llama_cpp/index.html new file mode 100644 index 0000000..d8cde21 --- /dev/null +++ b/sources/llama_cpp/index.html @@ -0,0 +1,156 @@ + + + + + + + + + llama.cpp — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+ + +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/llama_cpp/install.html b/sources/llama_cpp/install.html new file mode 100644 index 0000000..2c0d1dc --- /dev/null +++ b/sources/llama_cpp/install.html @@ -0,0 +1,250 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本教程面向使用 llama.cpp & 昇腾的开发者,帮助完成昇腾环境下 llama.cpp 的安装。

+
+

备注

+

目前 llama.cpp 仅支持 Atlas 300T A2 型号设备

+
+
+

llama.cpp 下载安装

+

此处提供源码安装和 docker 两种安装方式,请按需选择:

+ +
+
+
+
+
安装方式
+
源码安装
+
Docker
+
+
+
+

+
+

使用源代码安装

+
+

备注

+

请确保已经根据快速安装昇腾环境指引安装了对应的CANN-toolkit版本以及相应的固件和驱动,并应用了CANN-toolkit环境变量。

+
+
+

提示

+

LLAMA-Factory 支持的 CANN 最低版本为 8.0.rc1。安装 CANN 时,请同时安装 Kernel 算子包。

+
+ +

获取源代码

+
+

使用以下 git 指令获取源码

+
+
git clone https://github.com/ggerganov/llama.cpp
+cd llama.cpp
+
+
+ +

构建 llama.cpp

+
+
+
cmake -B build -DGGML_CANN=on -DCMAKE_BUILD_TYPE=release
+cmake --build build --config release
+
+
+ +
+ +
+
+

使用 Docker

+
+

备注

+

请确保已经根据快速安装昇腾环境指引安装了对应的的固件和驱动。

+
+
+

提示

+

更多 CANN 的基础镜像选择见ascendai/cann

+
+

构建 docker 镜像:

+
+
git clone https://github.com/ggerganov/llama.cpp
+cd llama.cpp
+docker build -t llama-cpp-cann -f .devops/llama-cli-cann.Dockerfile .
+
+

找到所有卡的运行信息:

+
+
npu-smi info
+
+

启动 docker 容器:

+
+
docker run --name llamacpp \
+--device /dev/davinci0  \
+--device /dev/davinci_manager \
+--device /dev/devmm_svm \
+--device /dev/hisi_hdc \
+-v /usr/local/dcmi:/usr/local/dcmi \
+-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
+-v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ \
+-v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \
+-v /PATH_TO_YOUR_MODELS/:/app/models \
+-it llama-cpp-cann -m /app/models/MODEL_PATH -ngl 32 \
+-p "Building a website can be done in 10 simple steps:"
+
+
+
+
+

安装校验

+

安装完成后,无任何报错信息,即为安装成功,下面为部分回显信息:

+
1[ 97%] Built target test-grammar-integration
+2[ 97%] Built target llama-speculative
+3[ 97%] Built target llama-perplexity
+4[ 98%] Linking CXX executable ../../bin/llama-bench
+5[ 98%] Linking CXX executable ../bin/test-json-schema-to-grammar
+6[ 98%] Built target llama-bench
+7[ 98%] Built target test-json-schema-to-grammar
+8[100%] Linking CXX executable ../../bin/llama-server
+9[100%] Built target llama-server
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/llama_cpp/quick_start.html b/sources/llama_cpp/quick_start.html new file mode 100644 index 0000000..92314dc --- /dev/null +++ b/sources/llama_cpp/quick_start.html @@ -0,0 +1,325 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装指南 准备好昇腾环境及 llama.cpp !

+
+

本教程聚焦大语言模型(Large Language Model,LLM)的推理过程,以 Qwen2.5-7B 模型为例,讲述如何使用 llama.cpp 在昇腾 NPU 上进行推理。

+
+

模型文件准备及量化

+

llama.cpp 的推理需要使用 gguf 格式文件,llama.cpp 提供了两种方式转换 Hugging Face 模型文件:

+
    +
  • 使用 GGUF-my-repo 将模型进行转换。

  • +
  • 使用项目中的 convert_hf_to_gguf.py 文件将 Hugging Face 模型转换为 gguf 格式:

    +
    +
    1python convert_hf_to_gguf.py path/to/model
    +
    +
    +
    +
  • +
+

详情请参考 Prepare and Quantize

+

注意:目前仅支持 FP16 精度及 Q4_0/Q8_0 量化模型。

+
+
+

推理

+

有两种设备选择模式:

+
    +
  • 单设备:使用用户指定的一个设备目标。

  • +
  • 多设备:自动选择具有相同后端的设备。

  • +
+ + + + + + + + + + + + + + +

设备选择

参数

单设备

--split-mode none --main-gpu DEVICE_ID

多设备

--split-mode layer (default)

+
+

使用单卡推理

+
1./build/bin/llama-cli -m path_to_model -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm none -mg 0
+
+
+
+
+

使用多卡推理

+
1./build/bin/llama-cli -m path_to_model -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm layer
+
+
+

以下为正常推理结果:

+
  1Log start
+  2main: build = 3520 (8e707118)
+  3main: built with cc (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 for aarch64-linux-gnu
+  4main: seed  = 1728907816
+  5llama_model_loader: loaded meta data with 22 key-value pairs and 291 tensors from /home/jiahao/models/llama3-8b-instruct-fp16.gguf (version GGUF V3 (latest))
+  6llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
+  7llama_model_loader: - kv   0:                       general.architecture str              = llama
+  8llama_model_loader: - kv   1:                               general.name str              = Meta-Llama-3-8B-Instruct
+  9llama_model_loader: - kv   2:                          llama.block_count u32              = 32
+ 10llama_model_loader: - kv   3:                       llama.context_length u32              = 8192
+ 11llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
+ 12llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
+ 13llama_model_loader: - kv   6:                 llama.attention.head_count u32              = 32
+ 14llama_model_loader: - kv   7:              llama.attention.head_count_kv u32              = 8
+ 15llama_model_loader: - kv   8:                       llama.rope.freq_base f32              = 500000.000000
+ 16llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010
+ 17llama_model_loader: - kv  10:                          general.file_type u32              = 1
+ 18llama_model_loader: - kv  11:                           llama.vocab_size u32              = 128256
+ 19llama_model_loader: - kv  12:                 llama.rope.dimension_count u32              = 128
+ 20llama_model_loader: - kv  13:                       tokenizer.ggml.model str              = gpt2
+ 21llama_model_loader: - kv  14:                         tokenizer.ggml.pre str              = llama-bpe
+ 22llama_model_loader: - kv  15:                      tokenizer.ggml.tokens arr[str,128256]  = ["!", "\"", "#", "$", "%", "&", "'", ...
+ 23llama_model_loader: - kv  16:                  tokenizer.ggml.token_type arr[i32,128256]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
+ 24llama_model_loader: - kv  17:                      tokenizer.ggml.merges arr[str,280147]  = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "...
+ 25llama_model_loader: - kv  18:                tokenizer.ggml.bos_token_id u32              = 128000
+ 26llama_model_loader: - kv  19:                tokenizer.ggml.eos_token_id u32              = 128009
+ 27llama_model_loader: - kv  20:                    tokenizer.chat_template str              = {% set loop_messages = messages %}{% ...
+ 28llama_model_loader: - kv  21:               general.quantization_version u32              = 2
+ 29llama_model_loader: - type  f32:   65 tensors
+ 30llama_model_loader: - type  f16:  226 tensors
+ 31llm_load_vocab: special tokens cache size = 256
+ 32llm_load_vocab: token to piece cache size = 0.8000 MB
+ 33llm_load_print_meta: format           = GGUF V3 (latest)
+ 34llm_load_print_meta: arch             = llama
+ 35llm_load_print_meta: vocab type       = BPE
+ 36llm_load_print_meta: n_vocab          = 128256
+ 37llm_load_print_meta: n_merges         = 280147
+ 38llm_load_print_meta: vocab_only       = 0
+ 39llm_load_print_meta: n_ctx_train      = 8192
+ 40llm_load_print_meta: n_embd           = 4096
+ 41llm_load_print_meta: n_layer          = 32
+ 42llm_load_print_meta: n_head           = 32
+ 43llm_load_print_meta: n_head_kv        = 8
+ 44llm_load_print_meta: n_rot            = 128
+ 45llm_load_print_meta: n_swa            = 0
+ 46llm_load_print_meta: n_embd_head_k    = 128
+ 47llm_load_print_meta: n_embd_head_v    = 128
+ 48llm_load_print_meta: n_gqa            = 4
+ 49llm_load_print_meta: n_embd_k_gqa     = 1024
+ 50llm_load_print_meta: n_embd_v_gqa     = 1024
+ 51llm_load_print_meta: f_norm_eps       = 0.0e+00
+ 52llm_load_print_meta: f_norm_rms_eps   = 1.0e-05
+ 53llm_load_print_meta: f_clamp_kqv      = 0.0e+00
+ 54llm_load_print_meta: f_max_alibi_bias = 0.0e+00
+ 55llm_load_print_meta: f_logit_scale    = 0.0e+00
+ 56llm_load_print_meta: n_ff             = 14336
+ 57llm_load_print_meta: n_expert         = 0
+ 58llm_load_print_meta: n_expert_used    = 0
+ 59llm_load_print_meta: causal attn      = 1
+ 60llm_load_print_meta: pooling type     = 0
+ 61llm_load_print_meta: rope type        = 0
+ 62llm_load_print_meta: rope scaling     = linear
+ 63llm_load_print_meta: freq_base_train  = 500000.0
+ 64llm_load_print_meta: freq_scale_train = 1
+ 65llm_load_print_meta: n_ctx_orig_yarn  = 8192
+ 66llm_load_print_meta: rope_finetuned   = unknown
+ 67llm_load_print_meta: ssm_d_conv       = 0
+ 68llm_load_print_meta: ssm_d_inner      = 0
+ 69llm_load_print_meta: ssm_d_state      = 0
+ 70llm_load_print_meta: ssm_dt_rank      = 0
+ 71llm_load_print_meta: model type       = 8B
+ 72llm_load_print_meta: model ftype      = F16
+ 73llm_load_print_meta: model params     = 8.03 B
+ 74llm_load_print_meta: model size       = 14.96 GiB (16.00 BPW)
+ 75llm_load_print_meta: general.name     = Meta-Llama-3-8B-Instruct
+ 76llm_load_print_meta: BOS token        = 128000 '<|begin_of_text|>'
+ 77llm_load_print_meta: EOS token        = 128009 '<|eot_id|>'
+ 78llm_load_print_meta: LF token         = 128 'Ä'
+ 79llm_load_print_meta: EOT token        = 128009 '<|eot_id|>'
+ 80llm_load_print_meta: max token length = 256
+ 81llm_load_tensors: ggml ctx size =    0.27 MiB
+ 82llm_load_tensors:        CPU buffer size = 15317.02 MiB
+ 83llm_load_tensors:       CANN buffer size = 13313.00 MiB
+ 84.........................................................................................
+ 85llama_new_context_with_model: n_ctx      = 8192
+ 86llama_new_context_with_model: n_batch    = 2048
+ 87llama_new_context_with_model: n_ubatch   = 512
+ 88llama_new_context_with_model: flash_attn = 0
+ 89llama_new_context_with_model: freq_base  = 500000.0
+ 90llama_new_context_with_model: freq_scale = 1
+ 91llama_kv_cache_init:       CANN KV buffer size =  1024.00 MiB
+ 92llama_new_context_with_model: KV self size  = 1024.00 MiB, K (f16):  512.00 MiB, V (f16):  512.00 MiB
+ 93llama_new_context_with_model:        CPU  output buffer size =     0.49 MiB
+ 94llama_new_context_with_model:       CANN compute buffer size =  1260.50 MiB
+ 95llama_new_context_with_model:        CPU compute buffer size =    24.01 MiB
+ 96llama_new_context_with_model: graph nodes  = 1030
+ 97llama_new_context_with_model: graph splits = 4
+ 98
+ 99system_info: n_threads = 192 / 192 | AVX = 0 | AVX_VNNI = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | FMA = 0 | NEON = 1 | SVE = 0 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | MATMUL_INT8 = 0 | LLAMAFILE = 1 |
+100sampling:
+101    repeat_last_n = 64, repeat_penalty = 1.000, frequency_penalty = 0.000, presence_penalty = 0.000
+102    top_k = 40, tfs_z = 1.000, top_p = 0.950, min_p = 0.050, typical_p = 1.000, temp = 0.800
+103    mirostat = 0, mirostat_lr = 0.100, mirostat_ent = 5.000
+104sampling order:
+105CFG -> Penalties -> top_k -> tfs_z -> typical_p -> top_p -> min_p -> temperature
+106generate: n_ctx = 8192, n_batch = 2048, n_predict = -1, n_keep = 1
+107
+108
+109Building a website can be done in 10 simple steps: 1. Define your website's purpose and target audience 2. Choose a domain name and register it with a registrar 3. Select a web hosting service and set up your hosting account 4. Design your website's layout and structure 5. Create content for your website, including text, images, and other media 6. Build a responsive website design that adapts to different devices and screen sizes 7. Choose a Content Management System (CMS) and install it on your website 8. Customize your website's design and layout using a CMS
+110
+111llama_print_timings:        load time =    9074.69 ms
+112llama_print_timings:      sample time =      31.97 ms /   112 runs   (    0.29 ms per token,  3503.28 tokens per second)
+113llama_print_timings: prompt eval time =     238.53 ms /    13 tokens (   18.35 ms per token,    54.50 tokens per second)
+114llama_print_timings:        eval time =   13152.29 ms /   111 runs   (  118.49 ms per token,     8.44 tokens per second)
+115llama_print_timings:       total time =   13623.53 ms /   124 tokens
+
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/llamafactory/example.html b/sources/llamafactory/example.html new file mode 100644 index 0000000..3cccb9d --- /dev/null +++ b/sources/llamafactory/example.html @@ -0,0 +1,371 @@ + + + + + + + + + 全流程昇腾实践 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

全流程昇腾实践

+

开始本篇之前,请阅读 LLaMA-Factory QuickStart 了解 LLaMA-Factory 及其主要功能的用法, +并参考 安装指南快速开始 完成基本的环境准备、LLaMA-Factory 安装及简单的微调和推理功能。 +本篇在此基础上,以 Qwen1.5-7B 模型为例,帮助开发者在昇腾 NPU 上使用 LLaMA-Factory 更多实用特性。

+

LLaMA-Factory QuickStart 中详解了下列 9 种功能,本教程为在 NPU 上全流程实践示例, +有关功能及参数的详细解析请参考 LLaMA-Factory QuickStart

+
    +
  1. 原始模型直接推理

  2. +
  3. 自定义数据集构建

  4. +
  5. 基于 LoRA 的 sft 指令微调

  6. +
  7. 动态合并 LoRA 的推理

  8. +
  9. 批量预测和训练效果评估

  10. +
  11. LoRA模型合并导出

  12. +
  13. 一站式 webui board 的使用

  14. +
  15. API Server的启动与调用

  16. +
  17. 大模型主流评测 benchmark

  18. +
+
+

前置准备

+
+

安装准备

+

请确认已按照 安装指南 安装 CANN 和 LLaMA-Factory 并完成安装校验。

+
+
+

配置文件准备

+

本示例中用到的参数配置文件与快速开始 qwen1_5_lora_sft_ds.yaml 中一致,可参考快速开始。

+
+
+
+

原始模型直接推理

+

验证 LLaMA-Factory 在昇腾 NPU 上推理功能是否正常:

+
1ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli webchat --model_name_or_path qwen/Qwen1.5-7B \
+2            --adapter_name_or_path saves/Qwen1.5-7B/lora/sft \
+3            --template qwen \
+4            --finetuning_type lora
+
+
+

如下图所示可正常进行对话,即为可正常推理:

+
+../../_images/webchat.png +
+
+
+

自定义数据集构建

+

本篇用到的数据集为 LLaMA-Factory 自带的 identity 和 alpaca_en_demo,对 identity 数据集进行如下全局替换即可实现定制指令:

+
    +
  • {{name}} 替换为 Ascend-helper

  • +
  • {{author}} 替换为 Ascend

  • +
+

更多自定义数据集的构建请参考 官方数据集构造指引

+
+
+

基于 LoRA 的 sft 指令微调

+

快速开始 中,已经尝试过使用 src/train.py 为入口的微调脚本,本篇中均使用 llamafactory-cli 命令启动微调、推理等程序。

+
1ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train <your_path>/qwen1_5_lora_sft_ds.yaml
+
+
+
+
+

动态合并 LoRA 的推理

+
1ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli chat --model_name_or_path qwen/Qwen1.5-7B \
+2            --adapter_name_or_path saves/Qwen1.5-7B/lora/sft \
+3            --template qwen \
+4            --finetuning_type lora
+
+
+

通过询问大模型是谁检验 sft 指令微调的成果,如下图,大模型回答自己是 Ascend-helper 说明 sft 成功,如失败,可返回 基于 LoRA 的 sft 指令微调 增加训练轮数重新训练。

+
+../../_images/sft-chat.gif +
+
+
+

批量预测和训练效果评估

+

使用批量预测和评估前,需先安装 jieba、rouge-chinese、nltk 三个库:

+
1pip install jieba,rouge-chinese,nltk -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+

然后使用以下指令对微调后的模型在 alpaca_gpt4_zh 和 identity 数据集上进行批量预测和效果评估:

+
 1ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli train \
+ 2            --stage sft \
+ 3            --do_predict \
+ 4            --model_name_or_path qwen/Qwen1.5-7B \
+ 5            --adapter_name_or_path ./saves/Qwen1.5-7B/lora/sft  \
+ 6            --dataset alpaca_gpt4_zh,identity \
+ 7            --dataset_dir ./data \
+ 8            --template qwen \
+ 9            --finetuning_type lora \
+10            --output_dir ./saves/Qwen1.5-7B/lora/predict \
+11            --overwrite_cache \
+12            --overwrite_output_dir \
+13            --cutoff_len 1024 \
+14            --preprocessing_num_workers 16 \
+15            --per_device_eval_batch_size 1 \
+16            --max_samples 20 \
+17            --predict_with_generate
+
+
+

完成批量预测与评估后,在指定的输出路径下会看到以下文件:

+
all_results.json
+generated_predictions.jsonl
+predict_results.json
+trainer_log.jsonl
+
+
+

generated_predictions.json 中为所有测试样本的输入提示词 prompt、标签 label 和模型输出预测结果 predict,下面是其中一个示例:

+
{"prompt": "system\nYou are a helpful assistant.\nuser\n保持健康的三个提示。\nassistant\n", "label": "以下是保持健康的三个提示:\n\n1. 保持身体活动。每天做适当的身体运动,如散步、跑步或游泳,能促进心血管健康,增强肌肉力量,并有助于减少体重。\n\n2. 均衡饮食。每天食用新鲜的蔬菜、水果、全谷物和脂肪含量低的蛋白质食物,避免高糖、高脂肪和加工食品,以保持健康的饮食习惯。\n\n3. 睡眠充足。睡眠对人体健康至关重要,成年人每天应保证 7-8 小时的睡眠。良好的睡眠有助于减轻压力,促进身体恢复,并提高注意力和记忆力。", "predict": "保持健康的三个提示包括:1. 均衡饮食:饮食应包括各种食物,如蔬菜、水果、全麦面包、蛋白质和健康脂肪,以满足身体的营养需求。\n2. 锻炼:每周至少进行150分钟的中等强度有氧运动,如快走、跑步、游泳或骑自行车,以及至少两次力量训练,以帮助维持身体健康。\n3. 睡眠:保持规律的睡眠习惯,每晚至少睡7-8小时,以帮助身体恢复和充电。"}
+
+
+

predict_results.json 中即为训练效果评估所得结果:

+
{
+    "predict_bleu-4": 50.941235,
+    "predict_rouge-1": 65.7085975,
+    "predict_rouge-2": 52.576409999999996,
+    "predict_rouge-l": 60.487535,
+    "predict_runtime": 196.1634,
+    "predict_samples_per_second": 0.204,
+    "predict_steps_per_second": 0.204
+}
+
+
+
+
+

LoRA 模型合并导出

+

LoRA 模型合并和导出时,可通过指定 export_device 参数为 auto 来自动检测当前加速卡环境, +启用 NPU 作为导出设备:

+
1ASCEND_RT_VISIBLE_DEVICES=0 llamafactory-cli export \
+2            --model_name_or_path qwen/Qwen1.5-7B \
+3            --adapter_name_or_path ./saves/Qwen1.5-7B/lora/sft  \
+4            --template qwen \
+5            --finetuning_type lora \
+6            --export_dir ./saves/Qwen1.5-7B/lora/megred-model-path \
+7            --export_size 2 \
+8            --export_device auto \
+9            --export_legacy_format False
+
+
+
+
+

一站式 webui board 的使用

+

使用 webui 可零代码实现以上功能,启动命令如下:

+
1ASCEND_RT_VISIBLE_DEVICES=0 GRADIO_SHARE=0 GRADIO_SERVER_PORT=7007 GRADIO_SERVER_NAME="0.0.0.0" llamafactory-cli webui
+
+
+

在 webui 实现 Qwen1.5-7B 模型的 LoRA 模型微调、动态推理和模型导出的操作示例:

+
+
+

API Server的启动与调用

+

API_PORT 为 API 服务的端口号,可替换为自定义端口。通过以下命令启动 API 服务:

+
1ASCEND_RT_VISIBLE_DEVICES=0 API_PORT=7007 llamafactory-cli api \
+2            --model_name_or_path qwen/Qwen1.5-7B \
+3            --adapter_name_or_path ./saves/Qwen1.5-7B/lora/sft \
+4            --template qwen \
+5            --finetuning_type lora
+
+
+

终端输出如下关键信息时,即可在下游任务重通过 API 调用 Qwen1.5-7B

+
1Visit http://localhost:7007/docs for API document.
+2INFO:     Started server process [2261535]
+3INFO:     Waiting for application startup.
+4INFO:     Application startup complete.
+5INFO:     Uvicorn running on http://0.0.0.0:7007 (Press CTRL+C to quit)
+
+
+

使用 API 调用 Qwen1.5-7B 实现问答聊天的示例代码,通过 message 传入您的问题:

+
 1import os
+ 2from openai import OpenAI
+ 3from transformers.utils.versions import require_version
+ 4
+ 5require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
+ 6
+ 7if __name__ == '__main__':
+ 8    # change to your custom port
+ 9    port = 7007
+10    client = OpenAI(
+11        api_key="0",
+12        base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 7007)),
+13    )
+14    messages = []
+15    messages.append({"role": "user", "content": "hello, what is Ascend NPU"})
+16    result = client.chat.completions.create(messages=messages, model="test")
+17    print(result.choices[0].message)
+
+
+

执行成功后可在终端看到如下输出,Qwen1.5-7B 正确介绍了 Ascend NPU:

+
ChatCompletionMessage(content='The Ascend NPU, or Neural Processing Unit, is an AI chip developed by Huawei that is designed to accelerate the performance of deep learning and artificial intelligence workloads. It is specifically designed to be energy-efficient, and is intended to be used in a wide range of devices, from smartphones to data centers. The Ascend NPU is designed to support a variety of AI workloads, including object detection, natural language processing, and speech recognition.', role='assistant', function_call=None, tool_calls=None)
+
+
+
+
+

进阶-大模型主流评测 benchmark

+

通过以下指令启动对 Qwen1.5-7B 模型在 mmlu 数据集的评测:

+
1llamafactory-cli eval \
+2    --model_name_or_path qwen/Qwen1.5-7B \
+3    --template fewshot \
+4    --task mmlu \
+5    --split validation \
+6    --lang en \
+7    --n_shot 5 \
+8    --batch_size 1
+
+
+

评测完成后,终端输出的评测结果如下,与 Qwen1.5-7B 官方报告对齐:

+
        Average: 61.79
+           STEM: 54.83
+Social Sciences: 73.00
+     Humanities: 55.02
+          Other: 67.32
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/llamafactory/faq.html b/sources/llamafactory/faq.html new file mode 100644 index 0000000..62e61a0 --- /dev/null +++ b/sources/llamafactory/faq.html @@ -0,0 +1,239 @@ + + + + + + + + + FAQ — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

FAQ

+
+

设备指定

+

Q:为什么我的 NPU 卡没调用起来?

+
    +
  1. 通过 ASCEND_RT_VISIBLE_DEVICES 环境变量指定昇腾 NPU 卡,如 ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 指定使用 0,1,2,3四张 NPU 卡进行微调/推理。

  2. +
+
+

提示

+

昇腾 NPU 卡从 0 开始编号,docker 容器内也是如此; +如映射物理机上的 6,7 号 NPU 卡到容器内使用,其对应的卡号分别为 0,1

+
+
    +
  1. 检查是否安装 torch-npu,建议通过 pip install -e '.[torch-npu,metrics]' 安装 LLaMA-Factory。

  2. +
+
+
+

推理报错

+

Q:使用昇腾 NPU 推理报错 RuntimeError: ACL stream synchronize failed, error code:507018

+

A:设置 do_sample: false,取消随机抽样策略

+

关联 issues:

+ +
+
+

微调/训练报错

+

Q:使用 ChatGLM 系列模型微调/训练模型时,报错 NotImplementedError: Unknown device for graph fuser

+

A:在 modelscope 或 huggingface 下载的 repo 里修改 modeling_chatglm.py 代码,取消 torch.jit 装饰器注释

+

关联 issues:

+ +

Q:微调/训练启动后,HCCL 报错,包含如下关键信息:

+
    RuntimeError: [ERROR] HCCL error in: torch_npu/csrc/distributed/ProcessGroupHCCL.cpp:64
+[ERROR] 2024-05-21-11:57:54 (PID:927000, Device:3, RankID:3) ERR02200 DIST call hccl api failed.
+EJ0001: 2024-05-21-11:57:54.167.645 Failed to initialize the HCCP process. Reason: Maybe the last training process is running.
+        Solution: Wait for 10s after killing the last training process and try again.
+        TraceBack (most recent call last):
+        tsd client wait response fail, device response code[1]. unknown device error.[FUNC:WaitRsp][FILE:process_mode_manager.cpp][LINE:290]
+        Fail to get sq reg virtual addr, deviceId=3, sqId=40.[FUNC:Setup][FILE:stream.cc][LINE:1102]
+        stream setup failed, retCode=0x7020010.[FUNC:SyncGetDevMsg][FILE:api_impl.cc][LINE:4643]
+        Sync get device msg failed, retCode=0x7020010.[FUNC:GetDevErrMsg][FILE:api_impl.cc][LINE:4704]
+        rtGetDevMsg execute failed, reason=[driver error:internal error][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
+
+
+

A:杀掉 device 侧所有进程,等待 10s 后重新启动训练。

+

关联 issues:

+ +

Q:使用 TeleChat 模型在昇腾 NPU 推理时,报错 AssertionError: Torch not compiled with CUDA enabled

+

A:此问题一般由代码中包含 cuda 相关硬编码造成,根据报错信息,找到 cuda 硬编码所在位置,对应修改为 NPU 代码。如 .cuda() 替换为 .npu().to("cuda") 替换为 .to("npu")

+

Q:模型微调遇到报错 DeviceType must be NPU. Actual DeviceType is: cpu,例如下列报错信息

+
File "/usr/local/pyenv/versions/3.10.13/envs/x/lib/python3.10/site-packages/transformers-4.41.1-py3.10.egg/transformers/generation/utils.py", line 1842, in generate
+    result = self._sample(
+File "/usr/local/pyenv/versions/3.10.13/envs/x/lib/python3.10/site-packages/transformers-4.41.1-py3.10.egg/transformers/generation/utils.py", line 2568, in _sample
+    next_tokens = next_tokens * unfinished_sequences + \
+RuntimeError: t == c10::DeviceType::PrivateUse1 INTERNAL ASSERT FAILED at "third_party/op-plugin/op_plugin/ops/base_ops/opapi/MulKernelNpuOpApi.cpp":26, please report a bug to PyTorch. DeviceType must be NPU. Actual DeviceType is: cpu
+[ERROR] 2024-05-29-17:04:48 (PID:70209, Device:0, RankID:-1) ERR00001 PTA invalid parameter
+
+
+

A:此类报错通常为部分 Tensor 未放到 NPU 上,请确保报错中算子所涉及的操作数均在 NPU 上。如上面的报错中,MulKernelNpuOpApi 算子为乘法算子,应确保 next_tokens 和 unfinished_sequences 均已放在 NPU 上。

+

Q:单卡 NPU 情况下,使用 DeepSpeed 训练模型,报错 AttributeError :'GemmaForCausalLM'obiect has no attribute"save checkpoint",此处 GemmaForCausalLM 还可能为其他模型

+

A:此问题一般为使用 python src/train.py 启动训练脚本或使用 llamafactory-cli train 的同时设置环境变量 FORCE_TORCHRUN 为 false 或 0 时出现。 +由于 DeepSpeed 只对分布式 launcher 启动的程序中的模型用 DeepSpeedEngine 包装,包装后才有 save_checkpoint 等方法。 +因此使用 torchrun 启动训练即可解决问题,即:

+
torchrun --nproc_per_node $NPROC_PER_NODE \
+        --nnodes $NNODES \
+        --node_rank $RANK \
+        --master_addr $MASTER_ADDR \
+        --master_port $MASTER_PORT \
+        src/train.py
+
+
+

同时使用 llamafactory-cli train 和 DeepSpeed 时,LLaMA-Factory 将自动设置 FORCE_TORCHRUN 为 1,启动分布式训练。如果您的代码中没有这个功能,请更新 LLaMA-Factory 为最新代码。

+

关联 issue 及 PR:

+ +
+
+

问题反馈

+

如果您遇到任何问题,欢迎在 官方社区 提 issue,或在 LLAMA-Factory × 昇腾交流群 内提问,我们将第一时间进行响应。

+

持续更新中 ...

+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/llamafactory/index.html b/sources/llamafactory/index.html new file mode 100644 index 0000000..1b8d679 --- /dev/null +++ b/sources/llamafactory/index.html @@ -0,0 +1,187 @@ + + + + + + + + + LLaMA-Factory — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/sources/llamafactory/install.html b/sources/llamafactory/install.html new file mode 100644 index 0000000..43e7cb1 --- /dev/null +++ b/sources/llamafactory/install.html @@ -0,0 +1,293 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本教程面向使用 LLAMA-Factory & 昇腾的开发者,帮助完成昇腾环境下 LLaMA-Factory 的安装。

+
+

LLAMA-Factory 下载安装

+

下载 LLAMA-Factory 并进入项目目录,本文档所有操作均在该目录下进行:

+
1git clone https://github.com/hiyouga/LLaMA-Factory.git
+2cd LLaMA-Factory
+
+
+

此处提供 docker 和 pip 两种安装方式,请按需选择:

+ +
+
+
+
+
安装方式
+
Docker
+
pip
+
+
+
+

+
+

使用 pip

+
+

备注

+

请确保已经根据快速安装昇腾环境指引安装了对应的CANN-toolkit版本以及相应的固件和驱动,并应用了CANN-toolkit环境变量。

+
+
+

警告

+

LLAMA-Factory 支持的 CANN 最低版本为 8.0.rc1。安装 CANN 时,请同时安装 Kernel 算子包。

+
+

Python 环境创建

+
+

创建并激活 Python 环境:

+
+
conda create -y -n llamafactory python=3.10
+conda activate llamafactory
+
+
+

LLaMA-Factory 安装

+
+

使用以下指令安装带有 torch-npu 的 LLaMA-Factory:

+
+
pip install -e ".[torch-npu,metrics]" -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+ +
+
+

使用 Docker

+
+

备注

+

请确保已经根据快速安装昇腾环境指引安装了对应的的固件和驱动。

+
+
+

提示

+

更多 CANN 的基础镜像选择见ascendai/cann

+
+

此处提供使用 docker-compose 构建及启动 docker 容器和不使用 docker-compose 两种构建方式,请根据需求选择其一。

+ +
+

使用 docker-compose 构建及启动 docker 容器

+ +

进入存放 Dockerfile 及 docker-compose.yaml 的 docker-npu 目录:

+
+
cd docker/docker-npu
+
+

构建 docker 镜像并启动 docker 容器:

+
+
docker-compose up -d
+
+ +

进入 docker 容器:

+
+
docker exec -it llamafactory bash
+
+ + +
+

不使用 docker-compose

+

构建 docker 镜像:

+
+
docker build -f ./docker/docker-npu/Dockerfile --build-arg INSTALL_DEEPSPEED=false --build-arg PIP_INDEX=https://pypi.org/simple -t llamafactory:latest .
+
+

启动 docker 容器:

+
+
docker run -dit \
+  -v ./hf_cache:/root/.cache/huggingface \
+  -v ./ms_cache:/root/.cache/modelscope \
+  -v ./data:/app/data \
+  -v ./output:/app/output \
+  -v /usr/local/dcmi:/usr/local/dcmi \
+  -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
+  -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+  -v /etc/ascend_install.info:/etc/ascend_install.info \
+  -p 7860:7860 \
+  -p 8000:8000 \
+  --device /dev/davinci0 \
+  --device /dev/davinci_manager \
+  --device /dev/devmm_svm \
+  --device /dev/hisi_hdc \
+  --shm-size 16G \
+  --name llamafactory \
+  llamafactory:latest
+
+

进入 docker 容器:

+
+
docker exec -it llamafactory bash
+
+
+ +
+
+
+
+

安装校验

+

使用以下指令对 LLaMA-Factory × 昇腾的安装进行校验:

+
llamafactory-cli env
+
+
+

如下所示,正确显示 LLaMA-Factory、PyTorch NPU 和 CANN 版本号及 NPU 型号等信息即说明安装成功。

+
- `llamafactory` version: 0.8.2.dev0
+- Platform: Linux-4.19.90-vhulk2211.3.0.h1543.eulerosv2r10.aarch64-aarch64-with-glibc2.31
+- Python version: 3.10.14
+- PyTorch version: 2.1.0 (NPU)
+- Transformers version: 4.41.2
+- Datasets version: 2.19.2
+- Accelerate version: 0.31.0
+- PEFT version: 0.11.1
+- TRL version: 0.9.4
+- NPU type: xxx
+- CANN version: 8.0.RC2.alpha001
+
+
+
+
+

LLaMA-Factory 卸载

+
1pip uninstall llamafactory
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/llamafactory/multi_npu.html b/sources/llamafactory/multi_npu.html new file mode 100644 index 0000000..bf9483d --- /dev/null +++ b/sources/llamafactory/multi_npu.html @@ -0,0 +1,195 @@ + + + + + + + + + 单机多卡微调 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

单机多卡微调

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境及 LLaMA-Factory !

+
+

本篇为 快速开始 的进阶,同样首先安装 DeepSpeed 和 ModelScope:

+
pip install -e ".[deepspeed,modelscope]" -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+

多卡 NPU 指定

+

使用 export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 指定所需 NPU 卡号,此处为 0~3 四卡 NPU。

+
+

备注

+

昇腾 NPU 卡从 0 开始编号,docker 容器内也是如此;

+

如映射物理机上的 6,7 号 NPU 卡到容器内使用,其对应的卡号分别为 0,1

+
+

或使用以下脚本自动检测并指定多卡 NPU:

+
# ------------------------------ detect npu --------------------------------------
+# detect npu via npu-smi
+if command -v npu-smi info &> /dev/null; then
+  num_npus=$(npu-smi info -l | grep "Total Count" | awk -F ":" '{print $NF}')
+  npu_list=$(seq -s, 0 $((num_npus-1)))
+else
+  num_npus=-1
+  npu_list="-1"
+fi
+echo using npu : $npu_list
+num_gpus=$(echo $npu_list | awk -F "," '{print NF}')
+# --------------------------------------------------------------------------------
+export ASCEND_RT_VISIBLE_DEVICES=$npu_list
+
+
+
+
+

基于 LoRA 的模型多卡分布式微调

+

通过 ASCEND_RT_VISIBLE_DEVICES 变量指定多卡后,使用 torchrun 启动分布式训练,需指定 nproc_per_node 参数为 NPU 卡数量,其余参数配置与 快速开始 中单卡微调保持一致

+
torchrun --nproc_per_node $num_npus \
+    --nnodes 1 \
+    --node_rank 0 \
+    --master_addr 127.0.0.1 \
+    --master_port 7007 \
+    src/train.py <your_path>/qwen1_5_lora_sft_ds.yaml
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/llamafactory/quick_start.html b/sources/llamafactory/quick_start.html new file mode 100644 index 0000000..53de761 --- /dev/null +++ b/sources/llamafactory/quick_start.html @@ -0,0 +1,299 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境及 LLaMA-Factory !

+
+

本教程聚焦大语言模型(Large Language Model,LLM)的微调过程,以 Qwen1.5-7B 模型为例,讲述如何使用 LLaMA-Factory 在昇腾 NPU 上进行 LoRA 微调及推理。

+

本篇将使用到 DeepSpeed 和 ModelScope,请使用以下指令安装:

+
pip install -e ".[deepspeed,modelscope]" -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+

环境变量配置

+

通过环境变量设置单卡 NPU,并使用 ModelScope 下载模型/数据集:

+
export ASCEND_RT_VISIBLE_DEVICES=0
+export USE_MODELSCOPE_HUB=1
+
+
+
+
+

基于 LoRA 的模型微调

+
+

yaml 配置文件

+

在 LLAMA-Factory 目录下,创建如下 qwen1_5_lora_sft_ds.yaml:

+
+ 展开 qwen1_5_lora_sft_ds.yaml +
+
### model
+model_name_or_path: qwen/Qwen1.5-7B
+
+### method
+stage: sft
+do_train: true
+finetuning_type: lora
+lora_target: q_proj,v_proj
+
+### ddp
+ddp_timeout: 180000000
+deepspeed: examples/deepspeed/ds_z0_config.json
+
+### dataset
+dataset: identity,alpaca_en_demo
+template: qwen
+cutoff_len: 1024
+max_samples: 1000
+overwrite_cache: true
+preprocessing_num_workers: 16
+
+### output
+output_dir: saves/Qwen1.5-7B/lora/sft
+logging_steps: 10
+save_steps: 500
+plot_loss: true
+overwrite_output_dir: true
+
+### train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 2
+learning_rate: 0.0001
+num_train_epochs: 3.0
+lr_scheduler_type: cosine
+warmup_steps: 0.1
+fp16: true
+
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+evaluation_strategy: steps
+eval_steps: 500
+    
+
+
+
+
+

开启微调

+

使用 torchrun 启动微调,微调涉及的所有参数均在 yaml 配置文件 中设置。

+
torchrun --nproc_per_node 1 \
+    --nnodes 1 \
+    --node_rank 0 \
+    --master_addr 127.0.0.1 \
+    --master_port 7007 \
+    src/train.py qwen1_5_lora_sft_ds.yaml
+
+
+
+

备注

+

nproc_per_node, nnodes, node_rank, master_addr, master_port 为 torchrun 所需参数,其详细含义可参考 PyTorch 官方文档

+
+

如正常输出模型加载、损失 loss 等日志,即说明成功微调。如需NPU 多卡分布式训练请参考 单机多卡微调

+
+
+
+

动态合并 LoRA 的推理

+

经 LoRA 微调后,通过 llamafactory-cli chat 使用微调后的模型进行推理,指定 adapter_name_or_path 参数为 LoRA 微调模型的存储路径:

+
llamafactory-cli chat --model_name_or_path qwen/Qwen1.5-7B \
+            --adapter_name_or_path saves/Qwen1.5-7B/lora/sft \
+            --template qwen \
+            --finetuning_type lora
+
+
+
+

备注

+

确保微调及推理阶段使用同一 prompt 模板 template

+
+

接下来即可在终端使用微调的模型进行问答聊天了!使用 Ctrl+C 或输入 exit 退出该问答聊天,如下图所示,为在 NPU 成功推理的样例:

+
+../../_images/chat-llamafactory.gif +
+
+

备注

+

第一轮问答会有一些 warning 告警,这是由于 transformers 库更新所致,不影响推理的正常运行,请忽略

+
+
+
+

完整脚本

+
+

推理及微调脚本

+

使用 Qwen1.5-7B 模型微调和推理的完整脚本如下:

+
# use modelscope
+export USE_MODELSCOPE_HUB=1
+
+# specify NPU
+export ASCEND_RT_VISIBLE_DEVICES=0
+
+### qwen/Qwen1.5-7B
+### finetune
+torchrun --nproc_per_node 1 \
+    --nnodes 1 \
+    --node_rank 0 \
+    --master_addr 127.0.0.1 \
+    --master_port 7007 \
+    src/train.py <your_path>/qwen1_5_lora_sft_ds.yaml
+
+### inference -- chat
+llamafactory-cli chat --model_name_or_path qwen/Qwen1.5-7B \
+            --adapter_name_or_path saves/Qwen1.5-7B/lora/sft \
+            --template qwen \
+            --finetuning_type lora
+
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/lm_deploy/index.html b/sources/lm_deploy/index.html new file mode 100644 index 0000000..e28cb5d --- /dev/null +++ b/sources/lm_deploy/index.html @@ -0,0 +1,160 @@ + + + + + + + + + LMDeploy — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+ + +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/lm_deploy/install.html b/sources/lm_deploy/install.html new file mode 100644 index 0000000..70c8b68 --- /dev/null +++ b/sources/lm_deploy/install.html @@ -0,0 +1,185 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

LMDeploy 是一个用于大型语言模型(LLMs)和视觉-语言模型(VLMs)压缩、部署和服务的 Python 库。其核心推理引擎包括 TurboMind 引擎和 PyTorch 引擎, +前者由 C++ 和 CUDA 开发,致力于推理性能的优化,而后者纯 Python 开发,旨在降低开发者的门槛。

+

本教程面向使用 lm-deploy & 昇腾的开发者,帮助完成昇腾环境下 lm-deploy 的安装。

+
+

lm_deploy 下载安装

+
+

使用 pip 安装(推荐)

+

推荐在一个干净的 conda 环境下(python3.8 - 3.12),安装 lmdeploy :

+
1conda create -n lmdeploy python=3.8 -y
+2conda activate lmdeploy
+3pip install lmdeploy
+
+
+
+
+

从源码安装

+

如果你使用 PyTorch 引擎进行推理,从源代码安装非常简单:

+
1git clone https://github.com/InternLM/lmdeploy.git
+2cd lmdeploy
+3pip install -e .
+
+
+
+
+
+

安装校验

+

安装过程中未出现错误,且执行下面命令后出现 lmdeploy 版本号即为安装成功。

+
1python -c "import lmdeploy; print(lmdeploy.__version__)"
+2
+3# 以下为输出示例
+4# 0.6.2
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/lm_deploy/quick_start.html b/sources/lm_deploy/quick_start.html new file mode 100644 index 0000000..8ac4d4a --- /dev/null +++ b/sources/lm_deploy/quick_start.html @@ -0,0 +1,277 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+

我们基于 LMDeploy 的 PytorchEngine,增加了华为昇腾设备(Atlas 800T A2)的支持。所以,在华为昇腾上使用 LMDeploy 的方法与在英伟达 GPU 上使用 PytorchEngine 后端的方法几乎相同。在阅读本教程之前,请先阅读原版的 快速开始

+
+

安装

+

我们强烈建议用户构建一个 Docker 镜像以简化环境设置。 +克隆 lmdeploy 的源代码,Dockerfile 位于 docker 目录中。

+
1git clone https://github.com/InternLM/lmdeploy.git
+2cd lmdeploy
+
+
+
+
+

环境准备

+

Docker 版本应不低于 18.03。并且需按照 官方指南 安装 Ascend Docker Runtime。

+
+

备注

+

如果在后续容器内出现 libascend_hal.so: cannot open shared object file 错误,说明 Ascend Docker Runtime 没有被正确安装。

+
+
+

Drivers,Firmware 和 CANN

+

目标机器需安装华为驱动程序和固件版本至少为 23.0.3,请参考 +CANN 驱动程序和固件安装 +和 下载资源

+

另外,docker/Dockerfile_aarch64_ascend 没有提供CANN 安装包,用户需要自己从 昇腾资源下载中心 下载 CANN(version 8.0.RC2.beta1)软件包。 +并将 Ascend-cann-kernels-910b*.runAscend-cann-nnal_*.runAscend-cann-toolkit*.run 放在 lmdeploy 源码根目录下。

+
+
+

构建镜像

+

请在 lmdeploy 源代码根目录下执行以下镜像构建命令,CANN 相关的安装包也放在此目录下。

+
1DOCKER_BUILDKIT=1 docker build -t lmdeploy-aarch64-ascend:latest \
+2-f docker/Dockerfile_aarch64_ascend .
+
+
+

如果以下命令执行没有任何错误,这表明环境设置成功。

+
1docker run -e ASCEND_VISIBLE_DEVICES=0 --rm --name lmdeploy -t lmdeploy-aarch64-ascend:latest lmdeploy check_env
+
+
+

关于在昇腾设备上运行 docker run 命令的详情,请参考这篇 文档

+
+
+
+

离线批处理

+
+

备注

+

图模式已经支持了 Atlas 800T A2。目前,单卡下的 LLaMa3-8B/LLaMa2-7B/Qwen2-7B 已经通过测试。用户可以设定 eager_mode=False 来开启图模式,或者设定 eager_mode=True 来关闭图模式。(启动图模式需要事先 source /usr/local/Ascend/nnal/atb/set_env.sh)

+
+
+

LLM 推理

+

device_type="ascend" 加入 PytorchEngineConfig 的参数中。

+
1from lmdeploy import pipeline
+2from lmdeploy import PytorchEngineConfig
+3if __name__ == "__main__":
+4    pipe = pipeline("internlm/internlm2_5-7b-chat",
+5                    backend_config=PytorchEngineConfig(tp=1, device_type="ascend", eager_mode=True))
+6    question = ["Shanghai is", "Please introduce China", "How are you?"]
+7    response = pipe(question)
+8    print(response)
+
+
+
+
+

VLM 推理

+

device_type="ascend" 加入 PytorchEngineConfig 的参数中。

+
1from lmdeploy import pipeline, PytorchEngineConfig
+2from lmdeploy.vl import load_image
+3if __name__ == "__main__":
+4    pipe = pipeline('OpenGVLab/InternVL2-2B',
+5                    backend_config=PytorchEngineConfig(tp=1, device_type='ascend', eager_mode=True))
+6    image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
+7    response = pipe(('describe this image', image))
+8    print(response)
+
+
+
+
+
+

在线服务

+
+

备注

+

图模式已经支持 Atlas 800T A2。目前,单卡下的 InternLM2-7B/LLaMa2-7B/Qwen2-7B 已经通过测试。 +在线服务时,图模式默认开启,用户可以添加 --eager-mode 来关闭图模式。(启动图模式需要事先 source /usr/local/Ascend/nnal/atb/set_env.sh )

+
+
+

LLM 模型服务

+

--device ascend 加入到服务启动命令中。

+
1lmdeploy serve api_server --backend pytorch --device ascend --eager-mode internlm/internlm2_5-7b-chat
+
+
+
+
+

VLM 模型服务

+

--device ascend 加入到服务启动命令中。

+
1lmdeploy serve api_server --backend pytorch --device ascend --eager-mode OpenGVLab/InternVL2-2B
+
+
+
+
+
+

使用命令行与LLM模型对话

+

--device ascend 加入到服务启动命令中。

+
1lmdeploy chat internlm/internlm2_5-7b-chat --backend pytorch --device ascend --eager-mode
+
+
+

也可以运行以下命令使启动容器后开启 lmdeploy 聊天

+
1docker exec -it lmdeploy_ascend_demo \
+2bash -i -c "lmdeploy chat --backend pytorch --device ascend --eager-mode internlm/internlm2_5-7b-chat"
+
+
+
+
+

量化

+

运行下面的代码可以在 Atlas 800T A2 上对权重进行 W4A16 量化。

+
1lmdeploy lite auto_awq $HF_MODEL --work-dir $WORK_DIR --device npu
+
+
+

支持的模型列表请参考 支持的模型

+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/lm_evaluation/index.html b/sources/lm_evaluation/index.html new file mode 100644 index 0000000..d95affe --- /dev/null +++ b/sources/lm_evaluation/index.html @@ -0,0 +1,153 @@ + + + + + + + + + LM-Evalution-Harness — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+ + +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/lm_evaluation/install.html b/sources/lm_evaluation/install.html new file mode 100644 index 0000000..7be464e --- /dev/null +++ b/sources/lm_evaluation/install.html @@ -0,0 +1,203 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本教程面向使用lm-evaluation-harnes&昇腾的开发者,帮助完成昇腾环境下lm-evaluation-harness的安装。

+
+

备注

+

请确保已经根据 快速安装昇腾环境 指引安装了对应的CANN-toolkit版本以及相应的固件和驱动,并应用了CANN-toolkit环境变量。

+
+
+

警告

+

lm-evaluation-harness支持的CANN最低版本为8.0.rc1。安装CANN时,请同事安装Kernel算子包。

+
+
+

lm-evaluation-harness安装

+

注意:lm-evaluation-harness从0.4.3开始原生支持昇腾。

+
    +
  • Option 1: Use the latest stable release

  • +
+
1pip install --upgrade-strategy=conservative lm-eval
+
+
+
    +
  • Option 2: Use the latest main branch under development

  • +
+
1pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git
+
+
+
+
+

安装校验

+

使用以下指令对lm-evaluation-harness的安装进行校验:

+
1lm-eval -h
+
+
+

如下所示,正确显示 lm-eval 命令的帮助信息即说明安装成功。

+
 1usage: lm-eval [-h] [--model MODEL] [--tasks task1,task2] [--model_args MODEL_ARGS] [--num_fewshot N]
+ 2            [--batch_size auto|auto:N|N] [--max_batch_size N] [--device DEVICE]
+ 3            [--output_path DIR|DIR/file.json] [--limit N|0<N<1] [--use_cache DIR]
+ 4            [--cache_requests {true,refresh,delete}] [--check_integrity] [--write_out] [--log_samples]
+ 5            [--system_instruction SYSTEM_INSTRUCTION] [--apply_chat_template] [--fewshot_as_multiturn]
+ 6            [--show_config] [--include_path DIR] [--gen_kwargs GEN_KWARGS]
+ 7            [--verbosity CRITICAL|ERROR|WARNING|INFO|DEBUG] [--wandb_args WANDB_ARGS]
+ 8            [--hf_hub_log_args HF_HUB_LOG_ARGS] [--predict_only] [--seed SEED] [--trust_remote_code]
+ 9
+10options:
+11-h, --help            show this help message and exit
+12--model MODEL, -m MODEL
+13                        Name of model e.g. `hf`
+14...
+
+
+
+
+

lm-evaluation-harness卸载

+
1pip uninstall lm-eval
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/lm_evaluation/quick_start.html b/sources/lm_evaluation/quick_start.html new file mode 100644 index 0000000..d58978d --- /dev/null +++ b/sources/lm_evaluation/quick_start.html @@ -0,0 +1,240 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照安装指南准备好昇腾环境及lm-evaluation-harness。

+
+
+

警告

+

注意目前昇腾 x lm-evaluation-harness仅支持transformers格式的模型评估。

+
+

如果要评估transformers类模型,例如评估`Qwen2-0.5B-Instruct`在`MMLU`上的性能,可以使用如下命令:

+
1# 替换HF域名,方便国内用户进行数据及模型的下载
+2export HF_ENDPOINT=https://hf-mirror.com
+3
+4lm_eval --model hf \
+5    --model_args pretrained=Qwen2-0.5B-Instruct \
+6    --tasks MMLU \
+7    --device npu:0 \  # 设备类型必须指定为npu
+8    --batch_size 8
+
+
+

出现以下日志代表评估成功:

+
 1...
+ 2hf (pretrained=Qwen/Qwen2-0.5B-Instruct), gen_kwargs: (None), limit: None, num_fewshot: None, batch_size: 8
+ 3|                 Tasks                 |Version|Filter|n-shot|Metric|   |Value |   |Stderr|
+ 4|---------------------------------------|-------|------|-----:|------|---|-----:|---|-----:|
+ 5|mmlu                                   |N/A    |none  |     0|acc   |  |0.4336|±  |0.0041|
+ 6|  - abstract_algebra                   |      0|none  |     0|acc   |  |0.3300|±  |0.0473|
+ 7|  - anatomy                            |      0|none  |     0|acc   |  |0.4667|±  |0.0431|
+ 8|  - astronomy                          |      0|none  |     0|acc   |  |0.3947|±  |0.0398|
+ 9|  - business_ethics                    |      0|none  |     0|acc   |  |0.5400|±  |0.0501|
+10|  - clinical_knowledge                 |      0|none  |     0|acc   |  |0.4679|±  |0.0307|
+11|  - college_biology                    |      0|none  |     0|acc   |  |0.3819|±  |0.0406|
+12|  - college_chemistry                  |      0|none  |     0|acc   |  |0.2800|±  |0.0451|
+13|  - college_computer_science           |      0|none  |     0|acc   |  |0.3600|±  |0.0482|
+14|  - college_mathematics                |      0|none  |     0|acc   |  |0.2700|±  |0.0446|
+15|  - college_medicine                   |      0|none  |     0|acc   |  |0.4277|±  |0.0377|
+16|  - college_physics                    |      0|none  |     0|acc   |  |0.2941|±  |0.0453|
+17|  - computer_security                  |      0|none  |     0|acc   |  |0.5000|±  |0.0503|
+18|  - conceptual_physics                 |      0|none  |     0|acc   |  |0.3532|±  |0.0312|
+19|  - econometrics                       |      0|none  |     0|acc   |  |0.3158|±  |0.0437|
+20|  - electrical_engineering             |      0|none  |     0|acc   |  |0.4897|±  |0.0417|
+21|  - elementary_mathematics             |      0|none  |     0|acc   |  |0.3519|±  |0.0246|
+22|  - formal_logic                       |      0|none  |     0|acc   |  |0.2857|±  |0.0404|
+23|  - global_facts                       |      0|none  |     0|acc   |  |0.2800|±  |0.0451|
+24|  - high_school_biology                |      0|none  |     0|acc   |  |0.4806|±  |0.0284|
+25|  - high_school_chemistry              |      0|none  |     0|acc   |  |0.3892|±  |0.0343|
+26|  - high_school_computer_science       |      0|none  |     0|acc   |  |0.4700|±  |0.0502|
+27|  - high_school_european_history       |      0|none  |     0|acc   |  |0.5697|±  |0.0387|
+28|  - high_school_geography              |      0|none  |     0|acc   |  |0.5101|±  |0.0356|
+29|  - high_school_government_and_politics|      0|none  |     0|acc   |  |0.4922|±  |0.0361|
+30|  - high_school_macroeconomics         |      0|none  |     0|acc   |  |0.4231|±  |0.0250|
+31|  - high_school_mathematics            |      0|none  |     0|acc   |  |0.2963|±  |0.0278|
+32|  - high_school_microeconomics         |      0|none  |     0|acc   |  |0.5000|±  |0.0325|
+33|  - high_school_physics                |      0|none  |     0|acc   |  |0.2185|±  |0.0337|
+34|  - high_school_psychology             |      0|none  |     0|acc   |  |0.5725|±  |0.0212|
+35|  - high_school_statistics             |      0|none  |     0|acc   |  |0.3333|±  |0.0321|
+36|  - high_school_us_history             |      0|none  |     0|acc   |  |0.5049|±  |0.0351|
+37|  - high_school_world_history          |      0|none  |     0|acc   |  |0.5823|±  |0.0321|
+38|  - human_aging                        |      0|none  |     0|acc   |  |0.4574|±  |0.0334|
+39|  - human_sexuality                    |      0|none  |     0|acc   |  |0.5115|±  |0.0438|
+40| - humanities                          |N/A    |none  |     0|acc   |  |0.4064|±  |0.0070|
+41|  - international_law                  |      0|none  |     0|acc   |  |0.6694|±  |0.0429|
+42|  - jurisprudence                      |      0|none  |     0|acc   |  |0.5185|±  |0.0483|
+43|  - logical_fallacies                  |      0|none  |     0|acc   |  |0.4724|±  |0.0392|
+44|  - machine_learning                   |      0|none  |     0|acc   |  |0.3036|±  |0.0436|
+45|  - management                         |      0|none  |     0|acc   |  |0.6214|±  |0.0480|
+46|  - marketing                          |      0|none  |     0|acc   |  |0.6624|±  |0.0310|
+47|  - medical_genetics                   |      0|none  |     0|acc   |  |0.4300|±  |0.0498|
+48|  - miscellaneous                      |      0|none  |     0|acc   |  |0.5160|±  |0.0179|
+49|  - moral_disputes                     |      0|none  |     0|acc   |  |0.5376|±  |0.0268|
+50|  - moral_scenarios                    |      0|none  |     0|acc   |  |0.2425|±  |0.0143|
+51|  - nutrition                          |      0|none  |     0|acc   |  |0.5327|±  |0.0286|
+52| - other                               |N/A    |none  |     0|acc   |  |0.4796|±  |0.0088|
+53|  - philosophy                         |      0|none  |     0|acc   |  |0.4759|±  |0.0284|
+54|  - prehistory                         |      0|none  |     0|acc   |  |0.4444|±  |0.0276|
+55|  - professional_accounting            |      0|none  |     0|acc   |  |0.3901|±  |0.0291|
+56|  - professional_law                   |      0|none  |     0|acc   |  |0.3572|±  |0.0122|
+57|  - professional_medicine              |      0|none  |     0|acc   |  |0.3676|±  |0.0293|
+58|  - professional_psychology            |      0|none  |     0|acc   |  |0.4314|±  |0.0200|
+59|  - public_relations                   |      0|none  |     0|acc   |  |0.5000|±  |0.0479|
+60|  - security_studies                   |      0|none  |     0|acc   |  |0.4857|±  |0.0320|
+61| - social_sciences                     |N/A    |none  |     0|acc   |  |0.4953|±  |0.0089|
+62|  - sociology                          |      0|none  |     0|acc   |  |0.6119|±  |0.0345|
+63| - stem                                |N/A    |none  |     0|acc   |  |0.3689|±  |0.0085|
+64|  - us_foreign_policy                  |      0|none  |     0|acc   |  |0.6800|±  |0.0469|
+65|  - virology                           |      0|none  |     0|acc   |  |0.4157|±  |0.0384|
+66|  - world_religions                    |      0|none  |     0|acc   |  |0.4912|±  |0.0383|
+67
+68|      Groups      |Version|Filter|n-shot|Metric|   |Value |   |Stderr|
+69|------------------|-------|------|-----:|------|---|-----:|---|-----:|
+70|mmlu              |N/A    |none  |     0|acc   |  |0.4336|±  |0.0041|
+71| - humanities     |N/A    |none  |     0|acc   |  |0.4064|±  |0.0070|
+72| - other          |N/A    |none  |     0|acc   |  |0.4796|±  |0.0088|
+73| - social_sciences|N/A    |none  |     0|acc   |  |0.4953|±  |0.0089|
+74| - stem           |N/A    |none  |     0|acc   |  |0.3689|±  |0.0085|
+75...
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/onnxruntime/index.html b/sources/onnxruntime/index.html new file mode 100644 index 0000000..ace1703 --- /dev/null +++ b/sources/onnxruntime/index.html @@ -0,0 +1,157 @@ + + + + + + + + + ONNX Runtime — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+ + +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/onnxruntime/install.html b/sources/onnxruntime/install.html new file mode 100644 index 0000000..0c3f43f --- /dev/null +++ b/sources/onnxruntime/install.html @@ -0,0 +1,174 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本教程面向使用 ONNX Runtime & Ascend NPU 的开发者,帮助完成昇腾环境下 ONNX Runtime 的安装。

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境!

+
+
+

ONNX Runtime 安装

+

ONNX Runtime 目前提供了 源码编译 和 二进制包 两种安装方式,其中二进制包当前只支持Python。

+
+

从源码安装

+
1# Default path, change it if needed.
+2source /usr/local/Ascend/ascend-toolkit/set_env.sh
+3
+4./build.sh --config <Release|Debug|RelWithDebInfo> --build_shared_lib --parallel --use_cann
+
+
+
+
+

从pip安装

+
1pip3 install onnxruntime-cann
+
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/onnxruntime/quick_start.html b/sources/onnxruntime/quick_start.html new file mode 100644 index 0000000..10cb9c5 --- /dev/null +++ b/sources/onnxruntime/quick_start.html @@ -0,0 +1,237 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装指南 准备好昇腾环境及 ONNX Runtime!

+
+

本教程以一个简单的 resnet50 模型为例,讲述如何在 Ascend NPU上使用 ONNX Runtime 进行模型推理。

+
+

环境准备

+

安装本教程所依赖的额外必要库。

+
1pip install numpy Pillow onnx
+
+
+
+
+

模型准备

+

ONNX Runtime 推理需要 ONNX 格式模型作为输入,目前有以下几种主流途径获得 ONNX 模型。

+
    +
  1. ONNX Model Zoo 中下载模型。

  2. +
  3. 从 torch、TensorFlow 等框架导出 ONNX 模型。

  4. +
  5. 使用转换工具,完成其他类型到 ONNX 模型的转换。

  6. +
+

本教程使用的 resnet50 模型是从 ONNX Model Zoo 中直接下载的,具体的 下载链接

+
+
+

类别标签

+

类别标签用于将输出权重转换成人类可读的类别信息,具体的 下载链接

+
+
+

模型推理

+
 1import onnxruntime as ort
+ 2import numpy as np
+ 3import onnx
+ 4from PIL import Image
+ 5
+ 6def preprocess(image_path):
+ 7    img = Image.open(image_path)
+ 8    img = img.resize((224, 224))
+ 9    img = np.array(img).astype(np.float32)
+10
+11    img = np.transpose(img, (2, 0, 1))
+12    img = img / 255.0
+13    mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
+14    std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
+15    img = (img - mean) / std
+16    img = np.expand_dims(img, axis=0)
+17    return img
+18
+19def inference(model_path, img):
+20    options = ort.SessionOptions()
+21    providers = [
+22        (
+23            "CANNExecutionProvider",
+24            {
+25                "device_id": 0,
+26                "arena_extend_strategy": "kNextPowerOfTwo",
+27                "npu_mem_limit": 2 * 1024 * 1024 * 1024,
+28                "op_select_impl_mode": "high_performance",
+29                "optypelist_for_implmode": "Gelu",
+30                "enable_cann_graph": True
+31            },
+32        ),
+33        "CPUExecutionProvider",
+34    ]
+35
+36    session = ort.InferenceSession(model_path, sess_options=options, providers=providers)
+37    input_name = session.get_inputs()[0].name
+38    output_name = session.get_outputs()[0].name
+39
+40    result = session.run([output_name], {input_name: img})
+41    return result
+42
+43def display(classes_path, result):
+44    with open(classes_path) as f:
+45        labels = [line.strip() for line in f.readlines()]
+46
+47    pred_idx = np.argmax(result)
+48    print(f'Predicted class: {labels[pred_idx]} ({result[0][0][pred_idx]:.4f})')
+49
+50if __name__ == '__main__':
+51    model_path = '~/model/resnet/resnet50.onnx'
+52    image_path = '~/model/resnet/cat.jpg'
+53    classes_path = '~/model/resnet/imagenet_classes.txt'
+54
+55    img = preprocess(image_path)
+56    result = inference(model_path, img)
+57    display(classes_path, result)
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/open_clip/index.html b/sources/open_clip/index.html new file mode 100644 index 0000000..3873206 --- /dev/null +++ b/sources/open_clip/index.html @@ -0,0 +1,160 @@ + + + + + + + + + open_clip — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+ + +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/open_clip/install.html b/sources/open_clip/install.html new file mode 100644 index 0000000..80e601a --- /dev/null +++ b/sources/open_clip/install.html @@ -0,0 +1,202 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本教程面向使用 open_clip & 昇腾的开发者,帮助完成昇腾环境下 open_clip 的安装。

+
+

昇腾环境安装

+

请根据已有昇腾产品型号及CPU架构等按照 快速安装昇腾环境指引 进行昇腾环境安装。

+
+

警告

+

CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。

+
+
+
+

Python 环境创建

+
1# 创建 python 3.10 的虚拟环境
+2conda create -y -n openclip python=3.10
+3# 激活虚拟环境
+4conda activate openclip
+
+
+
+
+

open_clip 安装

+

使用以下指令安装 open_clip:

+
1pip install open-clip-torch -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+
+

torch-npu 安装

+

按照 torch-npu 安装指引 安装 2.2.0 版本 torch 和 torch-npu,或使用以下指令快速安装:

+
1# install the dependencies
+2pip3 install attrs numpy==1.26.4 decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions -i https://pypi.tuna.tsinghua.edu.cn/simple
+3# install torch and torch-npu
+4pip install torch==2.2.0 torch-npu==2.2.0 -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+
+

安装校验

+

使用以下 Python 脚本对 open_clip 的安装进行校验,正确打印 open_clip 的版本号和 NPU 卡号说明安装成功。

+
1import torch
+2import torch_npu
+3import open_clip
+4
+5print("open_cliop version: ", clip.version.__version__)
+6print("NPU devices: ", torch.npu.current_device())
+
+
+

正确回显如下(单卡 NPU 环境):

+
open_cliop version: 2.24.0
+NPU devices: 0
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/open_clip/quick_start.html b/sources/open_clip/quick_start.html new file mode 100644 index 0000000..6d8ae2a --- /dev/null +++ b/sources/open_clip/quick_start.html @@ -0,0 +1,297 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境及 open_clip !

+
+

本文档帮助昇腾开发者快速使用 open_clip × 昇腾 进行训练和推理。

+
+

使用 NPU 的训练

+

首先在 src/training/main.py 脚本导入 torch 后,导入 torch-npu,并将 cuda 对应的 GradScaler 替换为 npu 的:

+
1import torch
+2import torch-npu
+3from torch.npu.amp import GradScaler
+
+
+

MS_COCO_2017_URL_TEXT 数据集的训练为例,使用在 DataComp 数据集训练过的 CLIP-ViT-B-32 模型权重为预训练权重,使用以下脚本启动单卡/多卡 NPU 上的训练:

+
+

单卡训练

+
+

备注

+

请根据实际情况指定数据集路径 train-dataval-dataimagenet-val 和预训练模型路径 pretrained

+
+
 1python -m training.main \
+ 2    --model ViT-B-32 \
+ 3    --save-frequency 1 \
+ 4    --zeroshot-frequency 1 \
+ 5    --report-to tensorboard \
+ 6    --train-data="./data/MS_COCO_2017_URL_TEXT/traincoco.csv" \
+ 7    --val-data="./data/MS_COCO_2017_URL_TEXT/traincoco.csv" \
+ 8    --imagenet-val="./data/ImageNet-1000/val/" \
+ 9    --pretrained "./models/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K/open_clip_pytorch_model.bin" \
+10    --warmup 10000 \
+11    --batch-size=128 \
+12    --lr=1e-3 \
+13    --wd=0.1 \
+14    --epochs=8 \
+15    --workers=8 \
+16    --seed 0
+
+
+
+
+

分布式训练

+

使用 torchrun 启动 NPU 分布式训练,需指定通信后端为 hccl(--dist-backend="hccl"):

+
+

备注

+

请根据实际情况指定数据集路径 train-dataval-dataimagenet-val 和预训练模型路径 pretrained

+

nproc_per_node 需指定为每个节点卡的数量,为 torchrun 所需参数,更多 torchrun 相关参数详细含义可参考 PyTorch 官方文档

+
+
 1# train on multi-npu
+ 2torchrun --nproc_per_node 2 -m training.main \
+ 3    --save-frequency 1 \
+ 4    --zeroshot-frequency 1 \
+ 5    --report-to tensorboard \
+ 6    --train-data="./data/MS_COCO_2017_URL_TEXT/traincoco.csv" \
+ 7    --val-data="./data/MS_COCO_2017_URL_TEXT/traincoco.csv" \
+ 8    --imagenet-val="./data/ImageNet-1000/val/" \
+ 9    --pretrained "./models/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K/open_clip_pytorch_model.bin" \
+10    --warmup 10000 \
+11    --batch-size=64 \
+12    --lr=1e-3 \
+13    --wd=0.1 \
+14    --epochs=1 \
+15    --workers=8 \
+16    --seed 0 \
+17    --model ViT-B-32 \
+18    --dist-backend="hccl"
+
+
+
+
+
+

使用 NPU 的推理

+

一般而言,自定义脚本中使用 open_clip 在昇腾上训练,需要导入 torch-npu,并将数据和模型放到 NPU 上,如下样例所示:

+
+

备注

+

请根据实际情况替换模型缓存路径 /path/to/modelsViT-B-32//path/to/models/ViT-B-32/ViT-B-32.pt/path/to/your/image.jpg

+
+
 1import torch
+ 2import torch_npu
+ 3from PIL import Image
+ 4import open_clip as clip
+ 5
+ 6# 下载模型至指定缓存路径
+ 7model = clip.openai.load_openai_model('ViT-B-32', cache_dir="/path/to/modelsViT-B-32/")
+ 8
+ 9model, _, preprocess = clip.create_model_and_transforms('ViT-B-32', pretrained='/path/to/models/ViT-B-32/ViT-B-32.pt')
+10tokenizer = clip.get_tokenizer('ViT-B-32')
+11
+12# put inputs and model to npu
+13image = preprocess(Image.open("/path/to/your/image.jpg")).unsqueeze(0).to("npu")
+14text = tokenizer(["a diagram", "a dog", "a cat"]).to("npu")
+15model = model.to("npu")
+16
+17with torch.no_grad(), torch.npu.amp.autocast():
+18    image_features = model.encode_image(image)
+19    text_features = model.encode_text(text)
+20    image_features /= image_features.norm(dim=-1, keepdim=True)
+21    text_features /= text_features.norm(dim=-1, keepdim=True)
+22
+23    text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
+24
+25print("Label probs:", text_probs)  # prints: [[1., 0., 0.]]
+
+
+

本示例所用输入图像:

+
+../../_images/CLIP.png + +
+

对应输出以下内容,正确预测其分类为 a dog:

+
Label probs: tensor([[0.0010, 0.9941, 0.0049]], device='npu:0')
+
+
+
+
+

模型评估

+

src/training/profiler.py 脚本导入 torch-npu,并将模型放到 NPU 上:

+
 1import argparse
+ 2
+ 3import torch
+ 4import torch_npu
+ 5
+ 6import open_clip
+ 7import pandas as pd
+ 8from torch.utils.flop_counter import FlopCounterMode
+ 9
+10... ...
+11
+12def profile_model(model_name, batch_size=1, profiler='torch'):
+13    model.eval()
+14    if torch.cuda.is_available():
+15        model = model.cuda()
+16    elif torch.npu.is_available():
+17        model = model.npu()
+
+
+

使用以下指令完成模型评估:

+
1python3 -m training.profiler --model ViT-L-14 --results-file "./logs/profiler_results.csv"
+
+
+

评估结果保存在 ./logs/profiler_results.csv 文件中:

+
model,image_size,image_width,text_width,embed_dim,mparams,image_mparams,text_mparams,gflops,image_gflops,text_gflops
+ViT-L-14,224,1024,768,768,427.62,303.97,123.65,175.33,162.03,13.3
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/opencompass/index.html b/sources/opencompass/index.html new file mode 100644 index 0000000..41f9bfc --- /dev/null +++ b/sources/opencompass/index.html @@ -0,0 +1,159 @@ + + + + + + + + + OpenCompass — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+ + +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/opencompass/install.html b/sources/opencompass/install.html new file mode 100644 index 0000000..c99a476 --- /dev/null +++ b/sources/opencompass/install.html @@ -0,0 +1,213 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本教程面向使用 OpenCompass & 昇腾的开发者,帮助完成昇腾环境下 OpenCompass 的安装。

+
+

昇腾环境安装

+

请根据已有昇腾产品型号及CPU架构等按照 快速安装昇腾环境指引 进行昇腾环境安装。

+
+

警告

+

CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。

+
+
+
+

Python 环境创建

+
1# 创建 python 3.10 的虚拟环境
+2conda create -y -n opencompass python=3.10
+3# 激活虚拟环境
+4conda activate opencompass
+
+
+
+
+

OpenCompass 安装

+

使用以下指令安装 OpenCompass:

+
 1  pip install -U opencompass -i https://pypi.tuna.tsinghua.edu.cn/simple
+ 2
+ 3## Full installation (with support for more datasets)
+ 4# pip install "opencompass[full]"
+ 5
+ 6## Environment with model acceleration frameworks
+ 7## Manage different acceleration frameworks using virtual environments
+ 8## since they usually have dependency conflicts with each other.
+ 9# pip install "opencompass[lmdeploy]"
+10# pip install "opencompass[vllm]"
+11
+12## API evaluation (i.e. Openai, Qwen)
+13# pip install "opencompass[api]"
+
+
+
+
+

torch-npu 安装

+

按照 torch-npu 安装指引 安装 2.1.0 版本 torch 和 torch-npu,或使用以下指令快速安装:

+
1# install the dependencies
+2pip3 install attrs numpy==1.26.4 decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions -i https://pypi.tuna.tsinghua.edu.cn/simple
+3# install torch and torch-npu
+4pip install torch==2.1.0 torch-npu==2.1.0 -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+
+

安装校验

+

使用以下 Python 脚本对 open_clip 的安装进行校验,正确打印 open_clip 的版本号和 NPU 卡号说明安装成功。

+
1import torch
+2import opencompass
+3
+4print("opencompass version: ", opencompass.__version__)
+5print("NPU devices: ", torch.npu.current_device())
+
+
+

正确回显如下(单卡 NPU 环境):

+
opencompass version:  0.3.3
+NPU devices: 0
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/opencompass/quick_start.html b/sources/opencompass/quick_start.html new file mode 100644 index 0000000..9fa5e5c --- /dev/null +++ b/sources/opencompass/quick_start.html @@ -0,0 +1,308 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境及 OpenCompass !

+
+

本文档帮助昇腾开发者快速使用 OpenCompass × 昇腾 进行训练和推理。

+
+

概览

+

在 OpenCompass 中评估一个模型通常包括以下几个阶段:配置 -> 推理 -> 评估 -> 可视化。

+

配置:这是整个工作流的起点。您需要配置整个评估过程,选择要评估的模型和数据集。此外,还可以选择评估策略、计算后端等,并定义显示结果的方式。

+

推理与评估:在这个阶段,OpenCompass 将会开始对模型和数据集进行并行推理和评估。推理阶段主要是让模型从数据集产生输出,而评估阶段则是衡量这些输出与标准答案的匹配程度。这两个过程会被拆分为多个同时运行的“任务”以提高效率,但请注意,如果计算资源有限,这种策略可能会使评测变得更慢。如果需要了解该问题及解决方案,可以参考 +FAQ: 效率:

+

可视化:评估完成后,OpenCompass 将结果整理成易读的表格,并将其保存为 CSV 和 TXT 文件。你也可以激活飞书状态上报功能,此后可以在飞书客户端中及时获得评测状态报告。

+

接下来,我们将展示 OpenCompass 的基础用法,展示基座模型 InternLM2-1.8B 和对话模型 InternLM2-Chat-1.8BQwen2-1.5B-Instruct 在 GSM8K 和 MATH 下采样数据集上的评估。它们的配置文件可以在 configs/eval_chat_demo.pyconfigs/eval_base_demo.py 中找到。

+

在运行此实验之前,请确保您已在本地安装了 opencompass && torch-npu

+

本文参考: +OpenCompass官方文档

+
+

配置评估任务

+
+

备注

+

在 OpenCompass 中,每个评估任务由待评估的模型和数据集组成。评估的入口点是 run.py。用户可以通过命令行或配置文件选择要测试的模型和数据集。

+
+

对于对话模型:

+
1python run.py \
+2--models hf_internlm2_chat_1_8b hf_qwen2_1_5b_instruct \
+3--datasets demo_gsm8k_chat_gen demo_math_chat_gen \
+4--debug
+
+
+

对于基座模型:

+
1python run.py \
+2--models hf_internlm2_1_8b hf_qwen2_1_5b \
+3--datasets demo_gsm8k_base_gen demo_math_base_gen \
+4--debug
+
+
+ + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
opencompass run.py 参数说明

命令行参数

描述

样例数值

--hf-type

HuggingFace 模型类型,可选值为 chat 或 base

chat

--hf-path

HuggingFace 模型路径

internlm/internlm2-chat-1_8b

--model-kwargs

构建模型的参数

device_map=’auto’

--tokenizer-path

HuggingFace tokenizer 路径(如果与模型路径相同,可以省略)

internlm/internlm2-chat-1_8b

--tokenizer-kwargs

构建 tokenizer 的参数

padding_side=’left’ truncation=’left’ trust_remote_code=True

--generation-kwargs

生成的参数

do_sample=True top_k=50 top_p=0.95

--max-seq-len

模型可以接受的最大序列长度

2048

--max-out-len

生成的最大 token 数

100

--min-out-len

生成的最小 token 数

1

--batch-size

批量大小

64

--hf-num-gpus

运行一个模型实例所需的 GPU 数量

1

--stop-words

停用词列表

‘<|im_end|>’ ‘<|im_start|>’

--pad-token-id

填充 token 的 ID

0

--peft-path

(例如) LoRA 模型的路径

internlm/internlm2-chat-1_8b

--peft-kwargs

(例如) 构建 LoRA 模型的参数

trust_remote_code=True

+
+
+

启动评估

+

由于 OpenCompass 默认并行启动评估过程,我们可以在第一次运行时以 --debug 模式启动评估,并检查是否存在问题。包括在前述的所有文档中,我们都使用了 --debug 开关。在 --debug 模式下,任务将按顺序执行,并实时打印输出。

+
1# train on multi-npu
+2python run.py configs/eval_chat_demo.py -w outputs/demo --debug
+
+
+

对话默写 ‘internlm/internlm2-chat-1_8b’ 和 ‘Qwen/Qwen2-1.5B-Instruct’ 将在首次运行期间从 HuggingFace 自动下载。 如果一切正常,您应该看到屏幕上显示 “Starting inference process”,且进度条开始前进:

+
1# train on multi-npu
+2[2023-07-12 18:23:55,076] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process...
+
+
+

然后,您可以按 Ctrl+C 中断程序,并以正常模式运行以下命令:

+
1# train on multi-npu
+2python run.py configs/eval_chat_demo.py -w outputs/demo
+
+
+

在正常模式下,评估任务将在后台并行执行,其输出将被重定向到输出目录 outputs/demo/{TIMESTAMP}。前端的进度条只指示已完成任务的数量,而不考虑其成功或失败。任何后端任务失败都只会在终端触发警告消息。

+
+
+
+

可视化评估结果

+

评估完成后,评估结果表格将打印如下:

+
1dataset     version    metric    mode      qwen2-1.5b-instruct-hf    internlm2-chat-1.8b-hf
+2----------  ---------  --------  ------  ------------------------  ------------------------
+3demo_gsm8k  1d7fe4     accuracy  gen                        56.25                     32.81
+4demo_math   393424     accuracy  gen                        18.75                     14.06
+
+
+

所有运行输出将定向到 outputs/demo/ 目录,结构如下:

+
 1outputs/default/
+ 2├── 20200220_120000
+ 3├── 20230220_183030     # 每个实验一个文件夹
+ 4   ├── configs         # 用于记录的已转储的配置文件。如果在同一个实验文件夹中重新运行了不同的实验,可能会保留多个配置
+ 5   ├── logs            # 推理和评估阶段的日志文件
+ 6      ├── eval
+ 7      └── infer
+ 8   ├── predictions   # 每个任务的推理结果
+ 9   ├── results       # 每个任务的评估结果
+10   └── summary       # 单个实验的汇总评估结果
+11├── ...
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/opencv/index.html b/sources/opencv/index.html new file mode 100644 index 0000000..9aabae4 --- /dev/null +++ b/sources/opencv/index.html @@ -0,0 +1,156 @@ + + + + + + + + + OpenCV — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+ + +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/opencv/install.html b/sources/opencv/install.html new file mode 100644 index 0000000..ee117d0 --- /dev/null +++ b/sources/opencv/install.html @@ -0,0 +1,256 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

OpenCV 4.9.0 版本开始,增加了图像处理相关高频接口的昇腾原生支持,本教程面向使用 OpenCV & 昇腾开发者,帮助完成昇腾环境下 OpenCV 的安装。

+
+

昇腾环境安装

+

请根据已有昇腾产品型号及CPU架构等按照 快速安装昇腾环境指引 进行昇腾环境安装,或直接获取对应产品的昇腾环境镜像 ascendai/cann

+
+

警告

+

CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。

+
+
+
+

OpenCV 安装

+

请遵循以下版本控制:

+ + + + + + + + + + + + + + + + + + + + + +

lib

最低版本

推荐版本

OpenCV

4.9.0

latest

Python

3.9

3.10

GCC

9.4.0

9.4.0

+
+

Python 环境创建

+
1# 创建名为 opencv 的 python 3.10 的虚拟环境
+2conda create -y -n opencv python=3.10
+3# 激活虚拟环境
+4conda activate opencv
+
+
+
+
+

源码编译

+
    +
  1. 下载 OpenCV 和 opencv_contrib

  2. +
+
1git clone https://github.com/opencv/opencv.git
+2
+3cd opencv
+4git clone https://github.com/opencv/opencv_contrib.git
+
+
+
    +
  1. 编译带有 opencv_contrib 的 OpenCV

  2. +
+
 1# 在 opencv 项目目录中创建并进入 build 目录
+ 2mkdir build
+ 3cd build
+ 4
+ 5# cmake & make
+ 6cmake -D CMAKE_BUILD_TYPE=RELEASE
+ 7    -D CMAKE_INSTALL_PREFIX=pwd/install \
+ 8    -D WITH_DEBUG=0 \
+ 9    -D OPENCV_EXTRA_MODULES_PATH=/path/to/opencv/opencv_contrib/modules \
+10    -D DWITH_CUDA=0 \
+11    -D DWITH_CANN=1 \
+12    -D DPYTHON3_EXECUTABLE=/path/to/miniconda3/envs/opencv/bin/python \
+13    -D DPYTHON_LIBRARY=/path/to/miniconda3/envs/opencv \
+14    -D PYTHON_INCLUDE_DIR=/path/to/miniconda3/envs/opencv/include/python3.10 \
+15    -D BUILD_opencv_wechat_qrcode=OFF \
+16    -D BUILD_opencv_xfeatures2d=OFF \
+17    -D BUILD_opencv_face=OFF \
+18    -D BUILD_opencv_dnn=OFF \
+19    -D BUILD_opencv_features2d=OFF \
+20    -D WITH_CAROTENE=OFF \
+21    -D WITH_IPP=OFF \
+22    -D BUILD_DOCS=ON \
+23    -D BUILD_EXAMPLES=ON ..
+24
+25make -j5
+
+
+

当编译出现以下关键回显信息时,说明编译成功。

+
# xxx 为 OpenCV 中某模块名称
+[100%] Built target xxx
+
+
+
+
+
+

安装校验

+

通过以下指令执行昇腾算子单元测试:

+
1cd path/to/opencv/build/bin
+2./opencv_test_cannops
+
+
+

出现以下关键回显说明安装成功:

+
[==========] 72 tests from 4 test cases ran. (40937 ms total)
+[  PASSED  ] 72 tests.
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/opencv/quick_start.html b/sources/opencv/quick_start.html new file mode 100644 index 0000000..443bc1b --- /dev/null +++ b/sources/opencv/quick_start.html @@ -0,0 +1,280 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境及 OpenCV !

+
+

OpenCV 中昇腾算子入参列表和 cpu 及 cuda 算子保持一致,除了对昇腾必要的初始化、去初始化之外,用户无需学习 CANN API,仅需要将原来的接口添加 cann 包名(C++ 接口为使用 cann 命名空间),整体流程如下图所示:

+
+../../_images/opencv_cannop.png + +
+
+

图像处理

+

OpenCV 当前支持 20+ 昇腾算子,此处根据图像处理应用场景,选取 addrotateflip 算子的应用作示例代码, +更多算子见 OpenCV 官方文档

+
+

使用 C++

+
+

备注

+

通过命令行传参 inputoutput 来指定输入和输出图像路径

+
+
 1// This file is part of OpenCV project.
+ 2// It is subject to the license terms in the LICENSE file found in the top-level directory
+ 3// of this distribution and at http://opencv.org/license.html.
+ 4
+ 5#include <iostream>
+ 6#include <opencv2/imgcodecs.hpp>
+ 7#include <opencv2/cann.hpp>
+ 8#include <opencv2/cann_interface.hpp>
+ 9
+10int main(int argc, char* argv[])
+11{
+12    cv::CommandLineParser parser(argc, argv,
+13    "{@input|puppy.png|path to input image}"
+14    "{@output|output.png|path to output image}"
+15    "{help||show help}");
+16    parser.about("This is a sample for image processing with Ascend NPU. \n");
+17    if (argc != 3 || parser.has("help"))
+18    {
+19        parser.printMessage();
+20        return 0;
+21    }
+22
+23    std::string imagePath = parser.get<std::string>(0);
+24    std::string outputPath = parser.get<std::string>(1);
+25
+26    // 读取输入图像
+27    cv::Mat img = cv::imread(imagePath);
+28    // 生成高斯噪声
+29    cv::Mat gaussNoise(img.rows, img.cols, img.type());
+30    cv::RNG rng;
+31    rng.fill(gaussNoise, cv::RNG::NORMAL, 0, 25);
+32
+33    // cann 初始化及指定设备
+34    cv::cann::initAcl();
+35    cv::cann::setDevice(0);
+36
+37    cv::Mat output;
+38    // 添加高斯噪声到输入图像
+39    cv::cann::add(img, gaussNoise, output);
+40    // 旋转图像 (0, 1, 2, 分别代表旋转 90°, 180°, 270°)
+41    cv::cann::rotate(output, output, 0);
+42    // 翻转图像 (0, 正数, 负数, 分别代表沿 x, y, x 和 y 轴进行翻转)
+43    cv::cann::flip(output, output, 0);
+44    // 写入输出图像
+45    cv::imwrite(outputPath, output);
+46
+47    // cann 去初始化
+48    cv::cann::resetDevice();
+49    cv::cann::finalizeAcl();
+50    return 0;
+51}
+
+
+
+
+

使用 Python

+
+

备注

+

通过命令行传参 inputoutput 来指定输入和输出图像路径

+
+
 1# This file is part of OpenCV project.
+ 2# It is subject to the license terms in the LICENSE file found in the top-level directory
+ 3# of this distribution and at http://opencv.org/license.html.
+ 4
+ 5import numpy as np
+ 6import cv2
+ 7import argparse
+ 8
+ 9parser = argparse.ArgumentParser(description='This is a sample for image processing with Ascend NPU.')
+10parser.add_argument('image', help='path to input image')
+11parser.add_argument('output', help='path to output image')
+12args = parser.parse_args()
+13
+14# 读取输入图像
+15img = cv2.imread(args.image)
+16# 生成高斯噪声
+17gaussNoise = np.random.normal(0, 25,(img.shape[0], img.shape[1], img.shape[2])).astype(img.dtype)
+18
+19# cann 初始化及指定设备
+20cv2.cann.initAcl()
+21cv2.cann.setDevice(0)
+22
+23# 添加高斯噪声到输入图像
+24output = cv2.cann.add(img, gaussNoise)
+25# 旋转图像 (0, 1, 2, 分别代表旋转 90°, 180°, 270°)
+26output = cv2.cann.rotate(output, 0)
+27# 翻转图像 (0, 正数, 负数, 分别代表沿 x, y, x 和 y 轴进行翻转)
+28output = cv2.cann.flip(output, 0)
+29# 写入输出图像
+30cv2.imwrite(args.output, output)
+31
+32# cann 去初始化
+33cv2.cann.finalizeAcl()
+
+
+
+
+

图像处理结果

+

本示例使用输入图像如图所示:

+
+../../_images/input.png + +
+

通过上述 Python 或 C++ 示例代码处理,得到的输出图像为:

+
+../../_images/result.png + +
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/pytorch/api_doc.html b/sources/pytorch/api_doc.html new file mode 100644 index 0000000..3d1f183 --- /dev/null +++ b/sources/pytorch/api_doc.html @@ -0,0 +1,782 @@ + + + + + + + + + API说明 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

API说明

+

PyTorch-NPU 除了提供了 PyTorch 官方算子实现之外,也提供了大量高性能的自定义算子,详细的算子信息以及描述如下所示:

+
+

备注

+

在运行下述示例之前,需要导入torch_npu扩展包 import torch_npu

+
+
+
+torch_npu._npu_dropout(self, p)
+

不使用种子(seed)进行dropout结果计数,与torch.dropout相似,优化NPU设备实现

+
+
参数:
+
    +
  • self (Tensor) -- 输入张量

  • +
  • p (Float) -- 丢弃概率

  • +
+
+
返回类型:
+

(Tensor, Tensor)

+
+
+
+ +

示例:

+
 1>>> input = torch.tensor([1.,2.,3.,4.]).npu()
+ 2>>> input
+ 3tensor([1., 2., 3., 4.], device='npu:0')
+ 4>>> prob = 0.3
+ 5>>> output, mask = torch_npu._npu_dropout(input, prob)
+ 6>>> output
+ 7tensor([0.0000, 2.8571, 0.0000, 0.0000], device='npu:0')
+ 8>>> mask
+ 9tensor([ 98, 255, 188, 186, 120, 157, 175, 159,  77, 223, 127,  79, 247, 151,
+10    253, 255], device='npu:0', dtype=torch.uint8)
+
+
+
+
+torch_npu.copy_memory_(dst, src, non_blocking=False) Tensor
+

从src拷贝元素到self张量,并返回self

+

约束说明:

+

copy_memory_仅支持NPU张量,copy_memory_的输入张量应具有相同的dtype和设备index

+
+
参数:
+
    +
  • dst (Tensor) -- 拷贝源张量

  • +
  • sr (Tensor) -- 返回张量所需数据类型

  • +
  • non_blocking (Bool,Default: False) -- 如果设置为True且此拷贝位于CPU和NPU之间,则拷贝可能相对于主机异步发生,在其他情况下,此参数没有效果

  • +
+
+
返回类型:
+

Tensor

+
+
+
+ +

示例:

+
1>>> a=torch.IntTensor([0,  0, -1]).npu()
+2>>> b=torch.IntTensor([1, 1, 1]).npu()
+3>>> a.copy_memory_(b)
+4tensor([1, 1, 1], device='npu:0', dtype=torch.int32)
+
+
+
+
+torch_npu.empty_with_format(size, dtype, layout, device, pin_memory, acl_format)
+

返回一个填充未初始化数据的张量

+
+
参数:
+
    +
  • size (ListInt) -- 定义输出张量shape的整数序列,可以是参数数量(可变值),也可以是列表或元组等集合

  • +
  • dtype (torch.dtype,Default: None) -- 返回张量所需数据类型;如果值为None,请使用全局默认值(请参见torch.set_default_tensor_type()).

  • +
  • layout (torch.layout, Default: torch.strided) -- 返回张量所需布局

  • +
  • device (torch.device, Default: None) -- 返回张量的所需设备

  • +
  • pin_memory (Bool, Default: False) -- 返回张量的所需设备

  • +
  • acl_format (Int, Default: 2) -- 返回张量所需内存格式

  • +
+
+
返回类型:
+

Tensor

+
+
+
+ +

示例:

+
1>>> torch_npu.empty_with_format((2, 3), dtype=torch.float32, device="npu")
+2tensor([[1., 1., 1.],
+3        [1., 1., 1.]], device='npu:0')
+
+
+
+
+torch_npu.fast_gelu(self) Tensor
+

gelu的npu实现,支持FakeTensor模式

+
+
参数:
+

self (Tensor) -- 输入张量(只float16、float32)

+
+
返回类型:
+

Tensor

+
+
+
+ +

示例:

+
 1# Normal
+ 2>>> x = torch.rand(2).npu()
+ 3>>> x
+ 4tensor([0.5991, 0.4094], device='npu:0')
+ 5>>> torch_npu.fast_gelu(x)
+ 6tensor([0.4403, 0.2733], device='npu:0')
+ 7
+ 8# FakeTensorMode
+ 9>>> from torch._subclasses.fake_tensor import FakeTensorMode
+10>>> with FakeTensorMode():
+11...     x = torch.rand(2).npu()
+12...     torch_npu.fast_gelu(x)
+13>>> FakeTensor(..., device='npu:0', size=(2,))
+
+
+
+
+torch_npu.npu_alloc_float_status(self) Tensor
+

生成一个包含8个0的一维张量

+
+
参数:
+

self (Tensor) -- 输入张量

+
+
返回类型:
+

Tensor

+
+
+
+ +

示例:

+
1>>> input    = torch.randn([1,2,3]).npu()
+2>>> output = torch_npu.npu_alloc_float_status(input)
+3>>> input
+4tensor([[[ 2.2324,  0.2478, -0.1056],
+5        [ 1.1273, -0.2573,  1.0558]]], device='npu:0')
+6>>> output
+7tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='npu:0')
+
+
+
+
+torch_npu.npu_anchor_response_flags(self, featmap_size, stride, num_base_anchors) Tensor
+

在单个特征图中生成锚点的责任标志

+
+
参数:
+
    +
  • self (Tensor) -- 真值框,shape为[batch, 4]的2D张量

  • +
  • featmap_size (ListInt[2]) -- 特征图大小

  • +
  • strides (ListInt[2]) -- 当前水平的步长

  • +
  • num_base_anchors (Int) -- base anchors的数量

  • +
+
+
返回类型:
+

Tensor

+
+
+
+ +

示例:

+
1>>> x = torch.rand(100, 4).npu()
+2>>> y = torch_npu.npu_anchor_response_flags(x, [60, 60], [2, 2], 9)
+3>>> y.shape
+4torch.Size([32400])
+
+
+
+
+torch_npu.npu_apply_adam(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, use_locking, use_nesterov, out=(var, m, v))
+

adam结果计数。

+
+
参数:
+
    +
  • beta1_power (Scalar) -- beta1的幂

  • +
  • beta2_power (Scalar) -- beta2的幂

  • +
  • lr (Scalar) -- 学习率

  • +
  • beta1 (Scalar) -- 一阶矩估计值的指数衰减率

  • +
  • beta2 (Scalar) -- 二阶矩估计值的指数衰减率

  • +
  • epsilon (Scalar) -- 添加到分母中以提高数值稳定性的项数

  • +
  • grad (Tensor) -- 梯度

  • +
  • use_locking (Bool) -- 设置为True时使用lock进行更新操作

  • +
  • use_nesterov (Bool) -- 设置为True时采用nesterov更新

  • +
  • var (Tensor) -- 待优化变量。

  • +
  • m (Tensor) -- 变量平均值。

  • +
  • v (Tensor) -- 变量方差。

  • +
+
+
+
+ +
+
+npu_batch_nms(self, scores, score_threshold, iou_threshold, max_size_per_class, max_total_size, change_coordinate_frame=False, transpose_box=False)
+
+
Module:
+

torch_npu

+
+
+

根据batch分类计算输入框评分,通过评分排序,删除评分高于阈值(iou_threshold)的框,支持多批多类处理。通过NonMaxSuppression(nms)操作可有效删除冗余的输入框,提高检测精度。NonMaxSuppression:抑制不是极大值的元素,搜索局部的极大值,常用于计算机视觉任务中的检测类模型。

+
+
参数:
+
    +
  • self (Tensor) -- 必填值,输入框的tensor,包含batch大小,数据类型Float16,输入示例:[batch_size, num_anchors, q, 4],其中q=1或q=num_classes

  • +
  • scores (Tensor) -- 必填值,输入tensor,数据类型Float16,输入示例:[batch_size, num_anchors, num_classes]

  • +
  • score_threshold (Float32) -- 必填值,指定评分过滤器的iou_threshold,用于筛选框,去除得分较低的框,数据类型Float32

  • +
  • iou_threshold (Float32) -- 必填值,指定nms的iou_threshold,用于设定阈值,去除高于阈值的的框,数据类型Float32

  • +
  • max_size_per_class (Int) -- 必填值,指定每个类别的最大可选的框数,数据类型Int

  • +
  • max_total_size (Int) -- 必填值,指定每个batch最大可选的框数,数据类型Int

  • +
  • change_coordinate_frame (Bool) -- 可选值, 是否正则化输出框坐标矩阵,数据类型Bool(默认False)

  • +
  • transpose_box (Bool) -- 可选值,确定是否在此op之前插入转置,数据类型Bool。True表示boxes使用4,N排布。 False表示boxes使用过N,4排布

  • +
+
+
+

输出说明: +:param Tensor nmsed_boxes: shape为(batch, max_total_size, 4)的3D张量,指定每批次输出的nms框,数据类型Float16 +:param Tensor nmsed_scores: shape为(batch, max_total_size)的2D张量,指定每批次输出的nms分数,数据类型Float16 +:param Tensor nmsed_classes: shape为(batch, max_total_size)的2D张量,指定每批次输出的nms类,数据类型Float16 +:param Tensor nmsed_num: shape为(batch)的1D张量,指定nmsed_boxes的有效数量,数据类型Int32

+
+
返回类型:
+

Tensor

+
+
+
+ +

示例:

+
1>>> boxes = torch.randn(8, 2, 4, 4, dtype = torch.float32).to("npu")
+2>>> scores = torch.randn(3, 2, 4, dtype = torch.float32).to("npu")
+3>>> nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = torch_npu.npu_batch_nms(boxes, scores, 0.3, 0.5, 3, 4)
+4>>> nmsed_boxes
+5>>> nmsed_scores
+6>>> nmsed_classes
+7>>> nmsed_num
+
+
+
+
+npu_bert_apply_adam(lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay, step_size=None, adam_mode=0, *, out=(var, m, v))
+
+
Module:
+

torch_npu

+
+
+

adam结果计数

+
+
参数:
+
    +
  • var (Tensor) -- float16或float32类型张量

  • +
  • m (Tensor) -- 数据类型和shape与exp_avg相同

  • +
  • v (Tensor) -- 数据类型和shape与exp_avg相同

  • +
  • lr (Scalar) -- 数据类型与exp_avg相同

  • +
  • beta1 (Scalar) -- 数据类型与exp_avg相同

  • +
  • beta2 (Scalar) -- 数据类型与exp_avg相同

  • +
  • epsilon (Scalar) -- 数据类型与exp_avg相同

  • +
  • grad (Tensor) -- 数据类型和shape与exp_avg相同

  • +
  • max_grad_norm (Scalar) -- 数据类型与exp_avg相同

  • +
  • global_grad_norm (Scalar) -- 数据类型与exp_avg相同

  • +
  • weight_decay (Scalar) -- 数据类型与exp_avg相同

  • +
  • step_size (Tensor) -- 默认值为None - shape为(1, ),数据类型与exp_avg一致

  • +
  • adam_mode (Int) -- 选择adam模式。0表示“adam”, 1表示“mbert_adam”, 默认值为0

  • +
+
+
+
+
关键字参数:

out (Tensor,可选) - 输出张量。

+
+
+
+ +

示例:

+
 1>>> var_in = torch.rand(321538).uniform_(-32., 21.).npu()
+ 2>>> m_in = torch.zeros(321538).npu()
+ 3>>> v_in = torch.zeros(321538).npu()
+ 4>>> grad = torch.rand(321538).uniform_(-0.05, 0.03).npu()
+ 5>>> max_grad_norm = -1.
+ 6>>> beta1 = 0.9
+ 7>>> beta2 = 0.99
+ 8>>> weight_decay = 0.
+ 9>>> lr = 0.
+10>>> epsilon = 1e-06
+11>>> global_grad_norm = 0.
+12>>> var_out, m_out, v_out = torch_npu.npu_bert_apply_adam(lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay, out=(var_in, m_in, v_in))
+13>>> var_out
+14tensor([ 14.7733, -30.1218,  -1.3647,  ..., -16.6840,   7.1518,   8.4872], device='npu:0')
+
+
+
+
+torch_npu.npu_bmmV2(self, mat2, output_sizes) Tensor
+

将矩阵“a”乘以矩阵“b”,生成“a*b”。支持FakeTensor模式

+
+
参数:
+
    +
  • self (Tensor) -- 2D或更高维度矩阵张量。数据类型:float16、float32、int32。格式:[ND, NHWC, FRACTAL_NZ]

  • +
  • mat2 (Tensor) -- 2D或更高维度矩阵张量。数据类型:float16、float32、int32。格式:[ND, NHWC, FRACTAL_NZ]

  • +
  • output_sizes (ListInt[]) -- 输出的shape,用于matmul的反向传播

  • +
+
+
返回类型:
+

Tensor

+
+
+
+ +

示例:

+
1>>> mat1 = torch.randn(10, 3, 4).npu()
+2>>> mat2 = torch.randn(10, 4, 5).npu()
+3>>> res = torch_npu.npu_bmmV2(mat1, mat2, [])
+4>>> res.shape
+5torch.Size([10, 3, 5])
+
+
+
+
+torch_npu.npu_bounding_box_decode(rois, deltas, means0, means1, means2, means3, stds0, stds1, stds2, stds3, max_shape, wh_ratio_clip) Tensor
+

根据rois和deltas生成标注框。自定义FasterRcnn算子

+
+
参数:
+
    +
  • rois (Tensor) -- 区域候选网络(RPN)生成的region of interests(ROI)。shape为(N,4)数据类型为float32或float16的2D张量。“N”表示ROI的数量, “4”表示“x0”、“x1”、“y0”和“y1”

  • +
  • deltas (Tensor) -- RPN生成的ROI和真值框之间的绝对变化。shape为(N,4)数据类型为float32或float16的2D张量。“N”表示错误数,“4”表示“dx”、“dy”、“dw”和“dh”

  • +
  • means0 (Float) -- index

  • +
  • means1 (Float) -- index

  • +
  • means2 (Float) -- index

  • +
  • means33 (Float) -- index, 默认值为[0,0,0,0], "deltas" = "deltas" x "stds" + "means"

  • +
  • stds0 (Float) -- index

  • +
  • stds1 (Float) -- index

  • +
  • stds2 (Float) -- index

  • +
  • stds3 (Float) -- index, 默认值:[1.0,1.0,1.0,1.0], deltas" = "deltas" x "stds" + "means"

  • +
  • max_shape (ListInt[2]) -- shape[h, w], 指定传输到网络的图像大小。用于确保转换后的bbox shape不超过“max_shape”

  • +
  • wh_ratio_clip (Float) -- 当前水平的步长

  • +
  • num_base_anchors (Int) -- “dw”和“dh”的值在(-wh_ratio_clip, wh_ratio_clip)范围内

  • +
+
+
返回类型:
+

Tensor

+
+
+
+ +

示例:

+
1>>> rois = torch.tensor([[1., 2., 3., 4.], [3.,4., 5., 6.]], dtype = torch.float32).to("npu")
+2>>> deltas = torch.tensor([[5., 6., 7., 8.], [7.,8., 9., 6.]], dtype = torch.float32).to("npu")
+3>>> output = torch_npu.npu_bounding_box_decode(rois, deltas, 0, 0, 0, 0, 1, 1, 1, 1, (10, 10), 0.1)
+4>>> output
+5tensor([[2.5000, 6.5000, 9.0000, 9.0000],
+6        [9.0000, 9.0000, 9.0000, 9.0000]], device='npu:0')
+
+
+
+
+torch_npu.npu_broadcast(self, size) Tensor
+

返回self张量的新视图,其单维度扩展,结果连续。

+
+
参数:
+
    +
  • self (Tensor) -- 输入张量。

  • +
  • size (ListInt) -- 对应扩展尺寸。

  • +
+
+
返回类型:
+

Tensor

+
+
+
+ +

示例:

+
1>>> x = torch.tensor([[1], [2], [3]]).npu()
+2>>> x.shape
+3torch.Size([3, 1])
+4>>> x.npu_broadcast(3, 4)
+5tensor([[1, 1, 1, 1],
+6       [2, 2, 2, 2],
+7       [3, 3, 3, 3]], device='npu:0')
+
+
+
+
+torch_npu.npu_ciou(Tensor self, Tensor gtboxes, bool trans=False, bool is_cross=True, int mode=0, bool atan_sub_flag=False) Tensor
+

应用基于NPU的CIoU操作。在DIoU的基础上增加了penalty item,并propose CIoU。

+
+
参数:
+
    +
  • boxes1 (Tensor) -- 格式为xywh、shape为(4, n)的预测检测框。

  • +
  • boxes2 (Tensor) -- 相应的gt检测框,shape为(4, n)。

  • +
  • trans (Bool) -- 是否有偏移。

  • +
  • is_cross (Bool) -- box1和box2之间是否有交叉操作。

  • +
  • mode (Int) -- 选择CIoU的计算方式。0表示IoU,1表示IoF。

  • +
+
+
+

:param Bool atan_sub_flag:是否将正向的第二个值传递给反向。

+
+
返回类型:
+

Tensor

+
+
+
+
约束说明:

到目前为止,CIoU向后只支持当前版本中的trans==True、is_cross==False、mode==0('iou')。如果需要反向传播,确保参数正确。

+
+
+
+ +

示例:

+
1>>> box1 = torch.randn(4, 32).npu()
+2>>> box1.requires_grad = True
+3>>> box2 = torch.randn(4, 32).npu()
+4>>> box2.requires_grad = True
+5>>> diou = torch_npu.contrib.function.npu_ciou(box1, box2)
+6>>> l = ciou.sum()
+7>>> l.backward()
+
+
+
+
+torch_npu.npu_clear_float_status(self) Tensor
+

在每个核中设置地址0x40000的值为0。

+
+
参数:
+

self (Tensor) -- 数据类型为float32的张量。

+
+
返回类型:
+

Tensor

+
+
+
+ +

示例:

+
1>>> x = torch.rand(2).npu()
+2>>> torch_npu.npu_clear_float_status(x)
+3tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='npu:0')
+
+
+
+
+torch_npu.npu_confusion_transpose(self, perm, shape, transpose_first) Tensor
+

混淆reshape和transpose运算。

+
+
参数:
+
    +
  • self (Tensor) -- 数据类型:float16、float32、int8、int16、int32、int64、uint8、uint16、uint32、uint64。

  • +
  • perm (ListInt) -- self张量的维度排列。

  • +
  • shape (ListInt) -- 输入shape。

  • +
  • transpose_first (Bool) -- 如果值为True,首先执行transpose,否则先执行reshape。

  • +
+
+
返回类型:
+

Tensor

+
+
+
+ +

示例:

+
1>>> x = torch.rand(2, 3, 4, 6).npu()
+2>>> x.shape
+3torch.Size([2, 3, 4, 6])
+4>>> y = torch_npu.npu_confusion_transpose(x, (0, 2, 1, 3), (2, 4, 18), True)
+5>>> y.shape
+6torch.Size([2, 4, 18])
+7>>> y2 = torch_npu.npu_confusion_transpose(x, (0, 2, 1), (2, 12, 6), False)
+8>>> y2.shape
+9torch.Size([2, 6, 12])
+
+
+
+
+torch_npu.npu_conv2d(input, weight, bias, stride, padding, dilation, groups) Tensor
+

在由多个输入平面组成的输入图像上应用一个2D卷积。

+
+
参数:
+
    +
  • input (Tensor) -- shape的输入张量,值为 (minibatch, in_channels, iH, iW)。

  • +
  • weight (Tensor) -- shape过滤器,值为 (out_channels, in_channels/groups, kH, kW)。

  • +
  • bias (Tensor) -- shape偏差 (out_channels)。

  • +
  • stride (ListInt) -- 卷积核步长。

  • +
  • padding (ListInt) -- 输入两侧的隐式填充。

  • +
  • dilation (ListInt) -- 内核元素间距。

  • +
  • groups (Int) -- 对输入进行分组。In_channels可被组数整除。

  • +
+
+
返回类型:
+

Tensor

+
+
+
+ +
+
+torch_npu.npu_conv3d(input, weight, bias, stride, padding, dilation, groups) Tensor
+

在由多个输入平面组成的输入图像上应用一个3D卷积。

+
+
参数:
+
    +
  • input (Tensor) -- shape的输入张量,值为 (minibatch, in_channels, iT, iH, iW)。

  • +
  • weight (Tensor) -- shape过滤器,值为 (out_channels, in_channels/groups, kT, kH, kW)。

  • +
  • bias (Tensor) -- shape偏差 (out_channels)。

  • +
  • stride (ListInt) -- 卷积核步长。

  • +
  • padding (ListInt) -- 输入两侧的隐式填充。

  • +
  • dilation (ListInt) -- 内核元素间距。

  • +
  • groups (Int) -- 对输入进行分组。In_channels可被组数整除。

  • +
+
+
返回类型:
+

Tensor

+
+
+
+ +
+
+torch_npu.npu_conv_transpose2d(input, weight, bias, padding, output_padding, stride, dilation, groups) Tensor
+

在由多个输入平面组成的输入图像上应用一个2D转置卷积算子,有时这个过程也被称为“反卷积”。

+
+
参数:
+
    +
  • input (Tensor) -- shape的输入张量,值为 (minibatch, in_channels, iH, iW)。

  • +
  • weight (Tensor) -- shape过滤器,值为 (in_channels, out_channels/groups, kH, kW)。

  • +
  • bias (Tensor) -- shape偏差 (out_channels)。

  • +
  • padding (ListInt) -- (dilation * (kernel_size - 1) - padding) 用零来填充输入每个维度的两侧。

  • +
  • output_padding (ListInt) -- 添加到输出shape每个维度一侧的附加尺寸。

  • +
  • stride (ListInt) -- 卷积核步长。

  • +
  • dilation (ListInt) -- 内核元素间距。

  • +
  • groups (Int) -- 对输入进行分组。In_channels可被组数整除。

  • +
+
+
返回类型:
+

Tensor

+
+
+
+ +
+
+torch_npu.npu_convolution(input, weight, bias, stride, padding, dilation, groups) Tensor
+

在由多个输入平面组成的输入图像上应用一个2D或3D卷积。

+
+
参数:
+
    +
  • input (Tensor) -- shape的输入张量,值为 (minibatch, in_channels, iH, iW) 或 (minibatch, in_channels, iT, iH, iW)。

  • +
  • weight (Tensor) -- shape过滤器,值为 (out_channels, in_channels/groups, kH, kW) 或 (out_channels, in_channels/groups, kT, kH, kW)。

  • +
  • bias (Tensor) -- shape偏差 (out_channels)。

  • +
  • stride (ListInt) -- 卷积核步长。

  • +
  • padding (ListInt) -- 输入两侧的隐式填充。

  • +
  • dilation (ListInt) -- 内核元素间距。

  • +
  • groups (Int) -- 对输入进行分组。In_channels可被组数整除。

  • +
+
+
返回类型:
+

Tensor

+
+
+
+ +
+
+torch_npu.npu_convolution_transpose(input, weight, bias, padding, output_padding, stride, dilation, groups) Tensor
+

在由多个输入平面组成的输入图像上应用一个2D或3D转置卷积算子,有时这个过程也被称为“反卷积”。

+
+
参数:
+
    +
  • input (Tensor) -- shape的输入张量,值为 (minibatch, in_channels, iH, iW) 或 (minibatch, in_channels, iT, iH, iW)。

  • +
  • weight (Tensor) -- shape过滤器,值为 (in_channels, out_channels/groups, kH, kW) 或 (in_channels, out_channels/groups, kT, kH, kW)。

  • +
  • bias (Tensor) -- shape偏差 (out_channels)。

  • +
  • padding (ListInt) -- (dilation * (kernel_size - 1) - padding) 用零来填充输入每个维度的两侧。

  • +
  • output_padding (ListInt) -- 添加到输出shape每个维度一侧的附加尺寸。

  • +
  • stride (ListInt) -- 卷积核步长。

  • +
  • dilation (ListInt) -- 内核元素间距。

  • +
  • groups (Int) -- 对输入进行分组。In_channels可被组数整除。

  • +
+
+
返回类型:
+

Tensor

+
+
+
+ +
+
+torch_npu.npu_deformable_conv2d(self, weight, offset, bias, kernel_size, stride, padding, dilation=[1, 1, 1, 1], groups=1, deformable_groups=1, modulated=True)
+

使用预期输入计算变形卷积输出(deformed convolution output)。

+
+
参数:
+
    +
  • self (Tensor) -- 输入图像的4D张量。格式为“NHWC”,数据按以下顺序存储:[batch, in_height, in_width, in_channels]。

  • +
  • weight (Tensor) -- 可学习过滤器的4D张量。数据类型需与self相同。格式为“HWCN”,数据按以下顺序存储:[filter_height, filter_width, in_channels / groups, out_channels]。

  • +
  • offset (Tensor) -- x-y坐标偏移和掩码的4D张量。格式为“NHWC”,数据按以下顺序存储:[batch, out_height, out_width, deformable_groups * filter_height * filter_width * 3]。bias (Tensor,可选) - 过滤器输出附加偏置(additive bias)的1D张量,数据按[out_channels]的顺序存储。

  • +
  • kernel_size (ListInt[2]) -- 内核大小,2个整数的元组/列表。

  • +
  • stride (ListInt) -- 4个整数的列表,表示每个输入维度的滑动窗口步长。维度顺序根据self的数据格式解释。N维和C维必须设置为1。

  • +
  • padding (ListInt) -- 4个整数的列表,表示要添加到输入每侧(顶部、底部、左侧、右侧)的像素数。

  • +
  • dilation (ListInt) -- 4个整数的列表,表示输入每个维度的膨胀系数(dilation factor)。维度顺序根据self的数据格式解释。N维和C维必须设置为1。

  • +
  • groups (Int) -- int32类型单整数,表示从输入通道到输出通道的阻塞连接数。In_channels和out_channels需都可被“groups”数整除。

  • +
  • deformable_groups (Int) -- int32类型单整数,表示可变形组分区的数量。In_channels需可被“deformable_groups”数整除。

  • +
  • transpose_first (Bool) -- 默认值为True, 指定DeformableConv2D版本。True表示v2版本, False表示v1版本,目前仅支持v2。

  • +
+
+
返回类型:
+

(Tensor, Tensor)

+
+
+
+ +

示例:

+
1>>> x = torch.rand(16, 32, 32, 32).npu()
+2>>> weight = torch.rand(32, 32, 5, 5).npu()
+3>>> offset = torch.rand(16, 75, 32, 32).npu()
+4>>> output, _ = torch_npu.npu_deformable_conv2d(x, weight, offset, None, kernel_size=[5, 5], stride = [1, 1, 1, 1], padding = [2, 2, 2, 2])
+5>>> output.shape
+6torch.Size([16, 32, 32, 32])
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/pytorch/examples.html b/sources/pytorch/examples.html new file mode 100644 index 0000000..27aa3f8 --- /dev/null +++ b/sources/pytorch/examples.html @@ -0,0 +1,298 @@ + + + + + + + + + 功能样例 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

功能样例

+

这些示例将会帮助您快速了解如何在Ascend NPU上使用PyTorch的相关特性。

+
+

备注

+

在运行下述示例之前,需要您已经安装了PyTorch-NPU环境,有关环境安装,请参考 安装指南

+
+
+

1. 数据并行

+

PyTorch的数据并行主要分为以下几种:DP、DDP以及FSDP(HSDP变种),接下来将简单描述在Ascend NPU场景下如何实现上述数据并行。

+
+

1.1 DDP

+
 1# encoding: UTF-8
+ 2
+ 3import os
+ 4import torch
+ 5import torch.distributed as dist
+ 6import torch.multiprocessing as mp
+ 7import torch.nn as nn
+ 8import torch.optim as optim
+ 9from torch.nn.parallel import DistributedDataParallel as DDP
+10
+11# 引入torch-npu包
+12import torch_npu
+13
+14
+15class ToyModel(nn.Module):
+16    def __init__(self):
+17        super(ToyModel, self).__init__()
+18        self.net1 = nn.Linear(10, 10)
+19        self.relu = nn.ReLU()
+20        self.net2 = nn.Linear(10, 5)
+21
+22    def forward(self, x):
+23        return self.net2(self.relu(self.net1(x)))
+24
+25
+26def setup(rank, world_size):
+27    os.environ["MASTER_ADDR"] = "localhost"
+28    os.environ["MASTER_PORT"] = "29500"
+29
+30    # initialize the process group
+31    dist.init_process_group("hccl", rank=rank, world_size=world_size)
+32
+33
+34def example(rank, world_size):
+35    device = torch.device("npu:{}".format(rank))
+36    # create default process group
+37    setup(rank, world_size)
+38    # create local model
+39    model = ToyModel().to(device)
+40    # construct DDP model
+41    ddp_model = DDP(model, device_ids=[rank])
+42    # define loss function and optimizer
+43    loss_fn = nn.MSELoss()
+44    optimizer = optim.SGD(ddp_model.parameters(), lr=0.001)
+45
+46    # forward pass
+47    outputs = ddp_model(torch.randn(20, 10).to(device))
+48    # backward pass
+49    labels = torch.randn(20, 5).to(device)
+50    loss_fn(outputs, labels).backward()
+51    # update parameters
+52    optimizer.step()
+53
+54
+55def main():
+56    n_npus = torch.cuda.device_count()
+57    assert n_npus >= 2, f"Requires at least 2 NPUs to run, but got {n_npus}"
+58    world_size = n_npus
+59    mp.spawn(example, args=(world_size,), nprocs=world_size, join=True)
+60
+61
+62if __name__ == "__main__":
+63    main()
+
+
+
+
+

1.2 FSDP

+
 1# encoding: UTF-8
+ 2
+ 3import os
+ 4import torch
+ 5import torch.distributed as dist
+ 6import torch.multiprocessing as mp
+ 7import torch.nn as nn
+ 8import torch.optim as optim
+ 9from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
+10
+11# 引入torch-npu包
+12import torch_npu
+13
+14
+15class ToyModel(nn.Module):
+16    def __init__(self):
+17        super(ToyModel, self).__init__()
+18        self.net1 = nn.Linear(10, 10)
+19        self.relu = nn.ReLU()
+20        self.net2 = nn.Linear(10, 5)
+21
+22    def forward(self, x):
+23        return self.net2(self.relu(self.net1(x)))
+24
+25
+26def setup(rank, world_size):
+27    os.environ["MASTER_ADDR"] = "localhost"
+28    os.environ["MASTER_PORT"] = "29500"
+29
+30    # initialize the process group
+31    dist.init_process_group("hccl", rank=rank, world_size=world_size)
+32
+33
+34def example(rank, world_size):
+35    device = torch.device("npu:{}".format(rank))
+36    # create default process group
+37    setup(rank, world_size)
+38    # create local model
+39    model = ToyModel().to(device)
+40    # construct FSDP model
+41    ddp_model = FSDP(model, device_id=rank)
+42    # define loss function and optimizer
+43    loss_fn = nn.MSELoss()
+44    optimizer = optim.SGD(ddp_model.parameters(), lr=0.001)
+45
+46    # forward pass
+47    outputs = ddp_model(torch.randn(20, 10).to(device))
+48    # backward pass
+49    labels = torch.randn(20, 5).to(device)
+50    loss_fn(outputs, labels).backward()
+51    # update parameters
+52    optimizer.step()
+53
+54
+55def main():
+56    n_npus = torch.cuda.device_count()
+57    assert n_npus >= 2, f"Requires at least 2 NPUs to run, but got {n_npus}"
+58    world_size = n_npus
+59    mp.spawn(example, args=(world_size,), nprocs=world_size, join=True)
+60
+61
+62if __name__ == "__main__":
+63    main()
+
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/pytorch/faq.html b/sources/pytorch/faq.html new file mode 100644 index 0000000..3fb6d13 --- /dev/null +++ b/sources/pytorch/faq.html @@ -0,0 +1,155 @@ + + + + + + + + + FAQ — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

FAQ

+
+

微信群

+

添加“开源小助手”微信,根据提示让小助手拉入群聊。

+../../_images/pytorch_wechat.jpg + +
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/pytorch/index.html b/sources/pytorch/index.html new file mode 100644 index 0000000..741c87a --- /dev/null +++ b/sources/pytorch/index.html @@ -0,0 +1,194 @@ + + + + + + + + + PyTorch — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/sources/pytorch/install.html b/sources/pytorch/install.html new file mode 100644 index 0000000..17116cd --- /dev/null +++ b/sources/pytorch/install.html @@ -0,0 +1,257 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

跟随指导,安装在NPU上运行的PyTorch版本。

+
+

1. 选择需要安装的 PyTorch 版本

+

准备安装 PyTorch:

+ +
+
+
+
PyTorch版本
+
PyTorch-NPU版本
+
CANN-toolkit版本
+
CPU架构
+
安装方式
+
+
+
+
PyTorch版本
+
2.3.1
+
2.2.0
+
2.1.0
+
+
+
PyTorch-NPU版本
+
null
+
+
+
CANN-toolkit版本
+
null
+
+
+
CPU架构
+
x86-64
+
aarch64
+
+
+
安装方式
+
Docker
+
pip
+
源码构建
+
+
+
+
+
+

2. 安装 PyTorch

+
+

警告

+

如果使用了非CANN安装时的Python环境(如Conda),请确保CANN-toolkit依赖的Python包在该环境中已经 安装

+
+
+
+

备注

+

请确保已经安装了与上述CANN-toolkit版本匹配的驱动和固件。

+
+
+

+    
+
+
+
+

备注

+

请确保已经根据上述表格建议安装了对应的CANN-toolkit版本以及相应的固件和驱动,并应用了CANN-toolkit环境变量。

+
+
+

+    
+
+
+
+
+

备注

+

请确保已经根据上述表格建议安装了对应的CANN-toolkit版本以及相应的固件和驱动,并应用了CANN-toolkit环境变量。

+
+

2.1 环境依赖

+
    +
  • Python 3.8 ~ 3.10
  • +
  • 支持C++17的编译器,例如clang 或者 gcc (9.4.0及以上)
  • +
  • Conda
  • +
+
+
+
+

备注

+

请确认CXX11_ABI是关闭的,如果无法确定,建议显式关闭:

+
+
export _GLIBCXX_USE_CXX11_ABI=0
+

2.2 构建

+
+

+            
+
+
+
+

3. 验证安装结果

+
1import torch
+2import torch_npu
+3
+4x = torch.randn(2, 2).npu()
+5y = torch.randn(2, 2).npu()
+6z = x.mm(y)
+7
+8print(z)
+
+
+

程序能够成功打印矩阵Z的值即为安装成功。

+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/pytorch/quick_start.html b/sources/pytorch/quick_start.html new file mode 100644 index 0000000..913de0d --- /dev/null +++ b/sources/pytorch/quick_start.html @@ -0,0 +1,2615 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

在运行下述示例之前,需要您已经安装了PyTorch-NPU环境,有关环境安装,请参考 安装指南

+
+

一般来说,要在代码中使用NPU进行训练推理,需要做以下更改:

+
    +
  1. 导入torch_npu扩展包 import torch_npu

  2. +
  3. 将模型,以及模型输入上传到NPU上

  4. +
+
1device= torch.device("npu")
+2model = model.to(device)
+3input = input.to(device)
+
+
+

下面的实例演示了如何使用NPU进行训练和推理任务:

+
+

1. 单卡训练

+

以下代码使用了cifar10数据集在NPU上训练模型(截取自 PyTorch tutorials),请关注高亮的内容。

+
  1"""
+  2Training an image classifier
+  3----------------------------
+  4
+  5We will do the following steps in order:
+  6
+  71. Load and normalize the CIFAR10 training and test datasets using
+  8``torchvision``
+  91. Define a Convolutional Neural Network
+ 102. Define a loss function
+ 113. Train the network on the training data
+ 124. Test the network on the test data
+ 13
+ 145. Load and normalize CIFAR10
+ 15^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ 16
+ 17Using ``torchvision``, it’s extremely easy to load CIFAR10.
+ 18"""
+ 19import torch
+ 20# 引入torch-npu包
+ 21import torch_npu
+ 22
+ 23# 定义device
+ 24device = torch.device('npu:0' if torch.npu.is_available() else 'cpu')
+ 25print(device)
+ 26
+ 27import torchvision
+ 28import torchvision.transforms as transforms
+ 29
+ 30########################################################################
+ 31# The output of torchvision datasets are PILImage images of range [0, 1].
+ 32# We transform them to Tensors of normalized range [-1, 1].
+ 33transform = transforms.Compose(
+ 34    [transforms.ToTensor(),
+ 35    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ 36
+ 37batch_size = 4
+ 38
+ 39trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
+ 40                                        download=True, transform=transform)
+ 41trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
+ 42                                        shuffle=True, num_workers=2)
+ 43
+ 44testset = torchvision.datasets.CIFAR10(root='./data', train=False,
+ 45                                    download=True, transform=transform)
+ 46testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
+ 47                                        shuffle=False, num_workers=2)
+ 48
+ 49classes = ('plane', 'car', 'bird', 'cat',
+ 50        'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
+ 51
+ 52########################################################################
+ 53# 2. Define a Convolutional Neural Network
+ 54# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ 55# Copy the neural network from the Neural Networks section before and modify it to
+ 56# take 3-channel images (instead of 1-channel images as it was defined).
+ 57import torch.nn as nn
+ 58import torch.nn.functional as F
+ 59
+ 60
+ 61class Net(nn.Module):
+ 62    def __init__(self):
+ 63        super().__init__()
+ 64        self.conv1 = nn.Conv2d(3, 6, 5)
+ 65        self.pool = nn.MaxPool2d(2, 2)
+ 66        self.conv2 = nn.Conv2d(6, 16, 5)
+ 67        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+ 68        self.fc2 = nn.Linear(120, 84)
+ 69        self.fc3 = nn.Linear(84, 10)
+ 70
+ 71    def forward(self, x):
+ 72        x = self.pool(F.relu(self.conv1(x)))
+ 73        x = self.pool(F.relu(self.conv2(x)))
+ 74        x = torch.flatten(x, 1) # flatten all dimensions except batch
+ 75        x = F.relu(self.fc1(x))
+ 76        x = F.relu(self.fc2(x))
+ 77        x = self.fc3(x)
+ 78        return x
+ 79
+ 80net = Net()
+ 81
+ 82# 将模型加载到NPU上
+ 83net.to(device)
+ 84
+ 85########################################################################
+ 86# 3. Define a Loss function and optimizer
+ 87# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ 88# Let's use a Classification Cross-Entropy loss and SGD with momentum.
+ 89import torch.optim as optim
+ 90
+ 91criterion = nn.CrossEntropyLoss()
+ 92optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
+ 93
+ 94########################################################################
+ 95# 4. Train the network
+ 96# ^^^^^^^^^^^^^^^^^^^^
+ 97#
+ 98# This is when things start to get interesting.
+ 99# We simply have to loop over our data iterator, and feed the inputs to the
+100# network and optimize.
+101
+102for epoch in range(2):  # loop over the dataset multiple times
+103
+104    running_loss = 0.0
+105    for i, data in enumerate(trainloader, 0):
+106        # get the inputs; data is a list of [inputs, labels]
+107        # 将input数据发送到NPU上
+108        inputs, labels = data[0].to(device), data[1].to(device)
+109
+110        # zero the parameter gradients
+111        optimizer.zero_grad()
+112
+113        # forward + backward + optimize
+114        outputs = net(inputs)
+115        loss = criterion(outputs, labels)
+116        loss.backward()
+117        optimizer.step()
+118
+119        # print statistics
+120        running_loss += loss.item()
+121        if i % 2000 == 1999:    # print every 2000 mini-batches
+122            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
+123            running_loss = 0.0
+124
+125print('Finished Training')
+126
+127########################################################################
+128# 5. Test the network on the test data
+129# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+130#
+131# We have trained the network for 2 passes over the training dataset.
+132# But we need to check if the network has learnt anything at all.
+133#
+134# We will check this by predicting the class label that the neural network
+135# outputs, and checking it against the ground-truth. If the prediction is
+136# correct, we add the sample to the list of correct predictions.
+137#
+138# Let us look at how the network performs on the whole dataset.
+139correct = 0
+140total = 0
+141# since we're not training, we don't need to calculate the gradients for our outputs
+142with torch.no_grad():
+143    for data in testloader:
+144        # 将input数据发送到NPU上
+145        images, labels = data[0].to(device), data[1].to(device)
+146        # calculate outputs by running images through the network
+147        outputs = net(images)
+148        # the class with the highest energy is what we choose as prediction
+149        _, predicted = torch.max(outputs.data, 1)
+150        total += labels.size(0)
+151        correct += (predicted == labels).sum().item()
+152
+153print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
+154########################################################################
+155# That looks way better than chance, which is 10% accuracy (randomly picking
+156# a class out of 10 classes).
+157# Seems like the network learnt something.
+158#
+159# Hmmm, what are the classes that performed well, and the classes that did
+160# not perform well:
+161
+162# prepare to count predictions for each class
+163correct_pred = {classname: 0 for classname in classes}
+164total_pred = {classname: 0 for classname in classes}
+165
+166# again no gradients needed
+167with torch.no_grad():
+168    for data in testloader:
+169        # 将input数据发送到NPU上
+170        images, labels = data[0].to(device), data[1].to(device)
+171        outputs = net(images)
+172        _, predictions = torch.max(outputs, 1)
+173        # collect the correct predictions for each class
+174        for label, prediction in zip(labels, predictions):
+175            if label == prediction:
+176                correct_pred[classes[label]] += 1
+177            total_pred[classes[label]] += 1
+178
+179
+180# print accuracy for each class
+181for classname, correct_count in correct_pred.items():
+182    accuracy = 100 * float(correct_count) / total_pred[classname]
+183    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')
+
+
+
+
+

2. 使用DeepSpeed多卡并行训练

+

以下代码使用了cifar10数据集,使用DeepSpeed训练模型在多张NPU卡上进行模型训练(来自 DeepSpeed Examples),自DeepSpeed v0.12.6之后,代码无需任何修改,即可自动检测NPU并进行训练。

+
  1import argparse
+  2import os
+  3
+  4import deepspeed
+  5import torch
+  6import torch.nn as nn
+  7import torch.nn.functional as F
+  8import torchvision
+  9import torchvision.transforms as transforms
+ 10from deepspeed.accelerator import get_accelerator
+ 11from deepspeed.moe.utils import split_params_into_different_moe_groups_for_optimizer
+ 12
+ 13
+ 14def add_argument():
+ 15    parser = argparse.ArgumentParser(description="CIFAR")
+ 16
+ 17    # For train.
+ 18    parser.add_argument(
+ 19        "-e",
+ 20        "--epochs",
+ 21        default=30,
+ 22        type=int,
+ 23        help="number of total epochs (default: 30)",
+ 24    )
+ 25    parser.add_argument(
+ 26        "--local_rank",
+ 27        type=int,
+ 28        default=-1,
+ 29        help="local rank passed from distributed launcher",
+ 30    )
+ 31    parser.add_argument(
+ 32        "--log-interval",
+ 33        type=int,
+ 34        default=2000,
+ 35        help="output logging information at a given interval",
+ 36    )
+ 37
+ 38    # For mixed precision training.
+ 39    parser.add_argument(
+ 40        "--dtype",
+ 41        default="fp16",
+ 42        type=str,
+ 43        choices=["bf16", "fp16", "fp32"],
+ 44        help="Datatype used for training",
+ 45    )
+ 46
+ 47    # For ZeRO Optimization.
+ 48    parser.add_argument(
+ 49        "--stage",
+ 50        default=0,
+ 51        type=int,
+ 52        choices=[0, 1, 2, 3],
+ 53        help="Datatype used for training",
+ 54    )
+ 55
+ 56    # For MoE (Mixture of Experts).
+ 57    parser.add_argument(
+ 58        "--moe",
+ 59        default=False,
+ 60        action="store_true",
+ 61        help="use deepspeed mixture of experts (moe)",
+ 62    )
+ 63    parser.add_argument(
+ 64        "--ep-world-size", default=1, type=int, help="(moe) expert parallel world size"
+ 65    )
+ 66    parser.add_argument(
+ 67        "--num-experts",
+ 68        type=int,
+ 69        nargs="+",
+ 70        default=[
+ 71            1,
+ 72        ],
+ 73        help="number of experts list, MoE related.",
+ 74    )
+ 75    parser.add_argument(
+ 76        "--mlp-type",
+ 77        type=str,
+ 78        default="standard",
+ 79        help="Only applicable when num-experts > 1, accepts [standard, residual]",
+ 80    )
+ 81    parser.add_argument(
+ 82        "--top-k", default=1, type=int, help="(moe) gating top 1 and 2 supported"
+ 83    )
+ 84    parser.add_argument(
+ 85        "--min-capacity",
+ 86        default=0,
+ 87        type=int,
+ 88        help="(moe) minimum capacity of an expert regardless of the capacity_factor",
+ 89    )
+ 90    parser.add_argument(
+ 91        "--noisy-gate-policy",
+ 92        default=None,
+ 93        type=str,
+ 94        help="(moe) noisy gating (only supported with top-1). Valid values are None, RSample, and Jitter",
+ 95    )
+ 96    parser.add_argument(
+ 97        "--moe-param-group",
+ 98        default=False,
+ 99        action="store_true",
+100        help="(moe) create separate moe param groups, required when using ZeRO w. MoE",
+101    )
+102
+103    # Include DeepSpeed configuration arguments.
+104    parser = deepspeed.add_config_arguments(parser)
+105
+106    args = parser.parse_args()
+107
+108    return args
+109
+110
+111def create_moe_param_groups(model):
+112    """Create separate parameter groups for each expert."""
+113    parameters = {"params": [p for p in model.parameters()], "name": "parameters"}
+114    return split_params_into_different_moe_groups_for_optimizer(parameters)
+115
+116
+117def get_ds_config(args):
+118    """Get the DeepSpeed configuration dictionary."""
+119    ds_config = {
+120        "train_batch_size": 16,
+121        "steps_per_print": 2000,
+122        "optimizer": {
+123            "type": "Adam",
+124            "params": {
+125                "lr": 0.001,
+126                "betas": [0.8, 0.999],
+127                "eps": 1e-8,
+128                "weight_decay": 3e-7,
+129            },
+130        },
+131        "scheduler": {
+132            "type": "WarmupLR",
+133            "params": {
+134                "warmup_min_lr": 0,
+135                "warmup_max_lr": 0.001,
+136                "warmup_num_steps": 1000,
+137            },
+138        },
+139        "gradient_clipping": 1.0,
+140        "prescale_gradients": False,
+141        "bf16": {"enabled": args.dtype == "bf16"},
+142        "fp16": {
+143            "enabled": args.dtype == "fp16",
+144            "fp16_master_weights_and_grads": False,
+145            "loss_scale": 0,
+146            "loss_scale_window": 500,
+147            "hysteresis": 2,
+148            "min_loss_scale": 1,
+149            "initial_scale_power": 15,
+150        },
+151        "wall_clock_breakdown": False,
+152        "zero_optimization": {
+153            "stage": args.stage,
+154            "allgather_partitions": True,
+155            "reduce_scatter": True,
+156            "allgather_bucket_size": 50000000,
+157            "reduce_bucket_size": 50000000,
+158            "overlap_comm": True,
+159            "contiguous_gradients": True,
+160            "cpu_offload": False,
+161        },
+162    }
+163    return ds_config
+164
+165
+166class Net(nn.Module):
+167    def __init__(self, args):
+168        super(Net, self).__init__()
+169        self.conv1 = nn.Conv2d(3, 6, 5)
+170        self.pool = nn.MaxPool2d(2, 2)
+171        self.conv2 = nn.Conv2d(6, 16, 5)
+172        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+173        self.fc2 = nn.Linear(120, 84)
+174        self.moe = args.moe
+175        if self.moe:
+176            fc3 = nn.Linear(84, 84)
+177            self.moe_layer_list = []
+178            for n_e in args.num_experts:
+179                # Create moe layers based on the number of experts.
+180                self.moe_layer_list.append(
+181                    deepspeed.moe.layer.MoE(
+182                        hidden_size=84,
+183                        expert=fc3,
+184                        num_experts=n_e,
+185                        ep_size=args.ep_world_size,
+186                        use_residual=args.mlp_type == "residual",
+187                        k=args.top_k,
+188                        min_capacity=args.min_capacity,
+189                        noisy_gate_policy=args.noisy_gate_policy,
+190                    )
+191                )
+192            self.moe_layer_list = nn.ModuleList(self.moe_layer_list)
+193            self.fc4 = nn.Linear(84, 10)
+194        else:
+195            self.fc3 = nn.Linear(84, 10)
+196
+197    def forward(self, x):
+198        x = self.pool(F.relu(self.conv1(x)))
+199        x = self.pool(F.relu(self.conv2(x)))
+200        x = x.view(-1, 16 * 5 * 5)
+201        x = F.relu(self.fc1(x))
+202        x = F.relu(self.fc2(x))
+203        if self.moe:
+204            for layer in self.moe_layer_list:
+205                x, _, _ = layer(x)
+206            x = self.fc4(x)
+207        else:
+208            x = self.fc3(x)
+209        return x
+210
+211
+212def test(model_engine, testset, local_device, target_dtype, test_batch_size=4):
+213    """Test the network on the test data.
+214
+215    Args:
+216        model_engine (deepspeed.runtime.engine.DeepSpeedEngine): the DeepSpeed engine.
+217        testset (torch.utils.data.Dataset): the test dataset.
+218        local_device (str): the local device name.
+219        target_dtype (torch.dtype): the target datatype for the test data.
+220        test_batch_size (int): the test batch size.
+221
+222    """
+223    # The 10 classes for CIFAR10.
+224    classes = (
+225        "plane",
+226        "car",
+227        "bird",
+228        "cat",
+229        "deer",
+230        "dog",
+231        "frog",
+232        "horse",
+233        "ship",
+234        "truck",
+235    )
+236
+237    # Define the test dataloader.
+238    testloader = torch.utils.data.DataLoader(
+239        testset, batch_size=test_batch_size, shuffle=False, num_workers=0
+240    )
+241
+242    # For total accuracy.
+243    correct, total = 0, 0
+244    # For accuracy per class.
+245    class_correct = list(0.0 for i in range(10))
+246    class_total = list(0.0 for i in range(10))
+247
+248    # Start testing.
+249    model_engine.eval()
+250    with torch.no_grad():
+251        for data in testloader:
+252            images, labels = data
+253            if target_dtype != None:
+254                images = images.to(target_dtype)
+255            outputs = model_engine(images.to(local_device))
+256            _, predicted = torch.max(outputs.data, 1)
+257            # Count the total accuracy.
+258            total += labels.size(0)
+259            correct += (predicted == labels.to(local_device)).sum().item()
+260
+261            # Count the accuracy per class.
+262            batch_correct = (predicted == labels.to(local_device)).squeeze()
+263            for i in range(test_batch_size):
+264                label = labels[i]
+265                class_correct[label] += batch_correct[i].item()
+266                class_total[label] += 1
+267
+268    if model_engine.local_rank == 0:
+269        print(
+270            f"Accuracy of the network on the {total} test images: {100 * correct / total : .0f} %"
+271        )
+272
+273        # For all classes, print the accuracy.
+274        for i in range(10):
+275            print(
+276                f"Accuracy of {classes[i] : >5s} : {100 * class_correct[i] / class_total[i] : 2.0f} %"
+277            )
+278
+279
+280def main(args):
+281    # Initialize DeepSpeed distributed backend.
+282    deepspeed.init_distributed()
+283    _local_rank = int(os.environ.get("LOCAL_RANK"))
+284    get_accelerator().set_device(_local_rank)
+285
+286    ########################################################################
+287    # Step1. Data Preparation.
+288    #
+289    # The output of torchvision datasets are PILImage images of range [0, 1].
+290    # We transform them to Tensors of normalized range [-1, 1].
+291    #
+292    # Note:
+293    #     If running on Windows and you get a BrokenPipeError, try setting
+294    #     the num_worker of torch.utils.data.DataLoader() to 0.
+295    ########################################################################
+296    transform = transforms.Compose(
+297        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+298    )
+299
+300    if torch.distributed.get_rank() != 0:
+301        # Might be downloading cifar data, let rank 0 download first.
+302        torch.distributed.barrier()
+303
+304    # Load or download cifar data.
+305    trainset = torchvision.datasets.CIFAR10(
+306        root="./data", train=True, download=True, transform=transform
+307    )
+308    testset = torchvision.datasets.CIFAR10(
+309        root="./data", train=False, download=True, transform=transform
+310    )
+311
+312    if torch.distributed.get_rank() == 0:
+313        # Cifar data is downloaded, indicate other ranks can proceed.
+314        torch.distributed.barrier()
+315
+316    ########################################################################
+317    # Step 2. Define the network with DeepSpeed.
+318    #
+319    # First, we define a Convolution Neural Network.
+320    # Then, we define the DeepSpeed configuration dictionary and use it to
+321    # initialize the DeepSpeed engine.
+322    ########################################################################
+323    net = Net(args)
+324
+325    # Get list of parameters that require gradients.
+326    parameters = filter(lambda p: p.requires_grad, net.parameters())
+327
+328    # If using MoE, create separate param groups for each expert.
+329    if args.moe_param_group:
+330        parameters = create_moe_param_groups(net)
+331
+332    # Initialize DeepSpeed to use the following features.
+333    #   1) Distributed model.
+334    #   2) Distributed data loader.
+335    #   3) DeepSpeed optimizer.
+336    ds_config = get_ds_config(args)
+337    model_engine, optimizer, trainloader, __ = deepspeed.initialize(
+338        args=args,
+339        model=net,
+340        model_parameters=parameters,
+341        training_data=trainset,
+342        config=ds_config,
+343    )
+344
+345    # Get the local device name (str) and local rank (int).
+346    local_device = get_accelerator().device_name(model_engine.local_rank)
+347    local_rank = model_engine.local_rank
+348
+349    # For float32, target_dtype will be None so no datatype conversion needed.
+350    target_dtype = None
+351    if model_engine.bfloat16_enabled():
+352        target_dtype = torch.bfloat16
+353    elif model_engine.fp16_enabled():
+354        target_dtype = torch.half
+355
+356    # Define the Classification Cross-Entropy loss function.
+357    criterion = nn.CrossEntropyLoss()
+358
+359    ########################################################################
+360    # Step 3. Train the network.
+361    #
+362    # This is when things start to get interesting.
+363    # We simply have to loop over our data iterator, and feed the inputs to the
+364    # network and optimize. (DeepSpeed handles the distributed details for us!)
+365    ########################################################################
+366
+367    for epoch in range(args.epochs):  # loop over the dataset multiple times
+368        running_loss = 0.0
+369        for i, data in enumerate(trainloader):
+370            # Get the inputs. ``data`` is a list of [inputs, labels].
+371            inputs, labels = data[0].to(local_device), data[1].to(local_device)
+372
+373            # Try to convert to target_dtype if needed.
+374            if target_dtype != None:
+375                inputs = inputs.to(target_dtype)
+376
+377            outputs = model_engine(inputs)
+378            loss = criterion(outputs, labels)
+379
+380            model_engine.backward(loss)
+381            model_engine.step()
+382
+383            # Print statistics
+384            running_loss += loss.item()
+385            if local_rank == 0 and i % args.log_interval == (
+386                args.log_interval - 1
+387            ):  # Print every log_interval mini-batches.
+388                print(
+389                    f"[{epoch + 1 : d}, {i + 1 : 5d}] loss: {running_loss / args.log_interval : .3f}"
+390                )
+391                running_loss = 0.0
+392    print("Finished Training")
+393
+394    ########################################################################
+395    # Step 4. Test the network on the test data.
+396    ########################################################################
+397    test(model_engine, testset, local_device, target_dtype)
+398
+399
+400if __name__ == "__main__":
+401    args = add_argument()
+402    main(args)
+
+
+
+
+

3. 使用Transforms进行模型微调

+

以下代码使用了Transforms对LLM进行微调(来自 transforms examples),自transforms xxx版本以及accelerator 0.21.0版本以后,代码无需任何修改,即可自动检测NPU并进行。

+
  1#!/usr/bin/env python
+  2# coding=utf-8
+  3# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
+  4#
+  5# Licensed under the Apache License, Version 2.0 (the "License");
+  6# you may not use this file except in compliance with the License.
+  7# You may obtain a copy of the License at
+  8#
+  9#     http://www.apache.org/licenses/LICENSE-2.0
+ 10#
+ 11# Unless required by applicable law or agreed to in writing, software
+ 12# distributed under the License is distributed on an "AS IS" BASIS,
+ 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ 14# See the License for the specific language governing permissions and
+ 15# limitations under the License.
+ 16"""
+ 17Fine-tuning the library models for causal language modeling (GPT, GPT-2, CTRL, ...) on a text file or a dataset.
+ 18
+ 19Here is the full list of checkpoints on the hub that can be fine-tuned by this script:
+ 20https://huggingface.co/models?filter=text-generation
+ 21"""
+ 22# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.
+ 23
+ 24import logging
+ 25import math
+ 26import os
+ 27import sys
+ 28from dataclasses import dataclass, field
+ 29from itertools import chain
+ 30from typing import Optional
+ 31
+ 32import datasets
+ 33import evaluate
+ 34import torch
+ 35from datasets import load_dataset
+ 36
+ 37import transformers
+ 38from transformers import (
+ 39    CONFIG_MAPPING,
+ 40    MODEL_FOR_CAUSAL_LM_MAPPING,
+ 41    AutoConfig,
+ 42    AutoModelForCausalLM,
+ 43    AutoTokenizer,
+ 44    HfArgumentParser,
+ 45    Trainer,
+ 46    TrainingArguments,
+ 47    default_data_collator,
+ 48    is_torch_xla_available,
+ 49    set_seed,
+ 50)
+ 51from transformers.testing_utils import CaptureLogger
+ 52from transformers.trainer_utils import get_last_checkpoint
+ 53from transformers.utils import check_min_version, send_example_telemetry
+ 54from transformers.utils.versions import require_version
+ 55
+ 56
+ 57# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+ 58check_min_version("4.48.0.dev0")
+ 59
+ 60require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
+ 61
+ 62logger = logging.getLogger(__name__)
+ 63
+ 64
+ 65MODEL_CONFIG_CLASSES = list(MODEL_FOR_CAUSAL_LM_MAPPING.keys())
+ 66MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
+ 67
+ 68
+ 69@dataclass
+ 70class ModelArguments:
+ 71    """
+ 72    Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
+ 73    """
+ 74
+ 75    model_name_or_path: Optional[str] = field(
+ 76        default=None,
+ 77        metadata={
+ 78            "help": (
+ 79                "The model checkpoint for weights initialization. Don't set if you want to train a model from scratch."
+ 80            )
+ 81        },
+ 82    )
+ 83    model_type: Optional[str] = field(
+ 84        default=None,
+ 85        metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)},
+ 86    )
+ 87    config_overrides: Optional[str] = field(
+ 88        default=None,
+ 89        metadata={
+ 90            "help": (
+ 91                "Override some existing default config settings when a model is trained from scratch. Example: "
+ 92                "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
+ 93            )
+ 94        },
+ 95    )
+ 96    config_name: Optional[str] = field(
+ 97        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+ 98    )
+ 99    tokenizer_name: Optional[str] = field(
+100        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+101    )
+102    cache_dir: Optional[str] = field(
+103        default=None,
+104        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
+105    )
+106    use_fast_tokenizer: bool = field(
+107        default=True,
+108        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+109    )
+110    model_revision: str = field(
+111        default="main",
+112        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+113    )
+114    token: str = field(
+115        default=None,
+116        metadata={
+117            "help": (
+118                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
+119                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+120            )
+121        },
+122    )
+123    trust_remote_code: bool = field(
+124        default=False,
+125        metadata={
+126            "help": (
+127                "Whether to trust the execution of code from datasets/models defined on the Hub."
+128                " This option should only be set to `True` for repositories you trust and in which you have read the"
+129                " code, as it will execute code present on the Hub on your local machine."
+130            )
+131        },
+132    )
+133    torch_dtype: Optional[str] = field(
+134        default=None,
+135        metadata={
+136            "help": (
+137                "Override the default `torch.dtype` and load the model under this dtype. If `auto` is passed, the "
+138                "dtype will be automatically derived from the model's weights."
+139            ),
+140            "choices": ["auto", "bfloat16", "float16", "float32"],
+141        },
+142    )
+143    low_cpu_mem_usage: bool = field(
+144        default=False,
+145        metadata={
+146            "help": (
+147                "It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
+148                "set True will benefit LLM loading time and RAM consumption."
+149            )
+150        },
+151    )
+152
+153    def __post_init__(self):
+154        if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
+155            raise ValueError(
+156                "--config_overrides can't be used in combination with --config_name or --model_name_or_path"
+157            )
+158
+159
+160@dataclass
+161class DataTrainingArguments:
+162    """
+163    Arguments pertaining to what data we are going to input our model for training and eval.
+164    """
+165
+166    dataset_name: Optional[str] = field(
+167        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+168    )
+169    dataset_config_name: Optional[str] = field(
+170        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+171    )
+172    train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
+173    validation_file: Optional[str] = field(
+174        default=None,
+175        metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
+176    )
+177    max_train_samples: Optional[int] = field(
+178        default=None,
+179        metadata={
+180            "help": (
+181                "For debugging purposes or quicker training, truncate the number of training examples to this "
+182                "value if set."
+183            )
+184        },
+185    )
+186    max_eval_samples: Optional[int] = field(
+187        default=None,
+188        metadata={
+189            "help": (
+190                "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+191                "value if set."
+192            )
+193        },
+194    )
+195    streaming: bool = field(default=False, metadata={"help": "Enable streaming mode"})
+196    block_size: Optional[int] = field(
+197        default=None,
+198        metadata={
+199            "help": (
+200                "Optional input sequence length after tokenization. "
+201                "The training dataset will be truncated in block of this size for training. "
+202                "Default to the model max input length for single sentence inputs (take into account special tokens)."
+203            )
+204        },
+205    )
+206    overwrite_cache: bool = field(
+207        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+208    )
+209    validation_split_percentage: Optional[int] = field(
+210        default=5,
+211        metadata={
+212            "help": "The percentage of the train set used as validation set in case there's no validation split"
+213        },
+214    )
+215    preprocessing_num_workers: Optional[int] = field(
+216        default=None,
+217        metadata={"help": "The number of processes to use for the preprocessing."},
+218    )
+219    keep_linebreaks: bool = field(
+220        default=True, metadata={"help": "Whether to keep line breaks when using TXT files or not."}
+221    )
+222
+223    def __post_init__(self):
+224        if self.streaming:
+225            require_version("datasets>=2.0.0", "The streaming feature requires `datasets>=2.0.0`")
+226
+227        if self.dataset_name is None and self.train_file is None and self.validation_file is None:
+228            raise ValueError("Need either a dataset name or a training/validation file.")
+229        else:
+230            if self.train_file is not None:
+231                extension = self.train_file.split(".")[-1]
+232                assert extension in ["csv", "json", "txt"], "`train_file` should be a csv, a json or a txt file."
+233            if self.validation_file is not None:
+234                extension = self.validation_file.split(".")[-1]
+235                assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, a json or a txt file."
+236
+237
+238def main():
+239    # See all possible arguments in src/transformers/training_args.py
+240    # or by passing the --help flag to this script.
+241    # We now keep distinct sets of args, for a cleaner separation of concerns.
+242
+243    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
+244    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+245        # If we pass only one argument to the script and it's the path to a json file,
+246        # let's parse it to get our arguments.
+247        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+248    else:
+249        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+250
+251    # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
+252    # information sent is the one passed as arguments along with your Python/PyTorch versions.
+253    send_example_telemetry("run_clm", model_args, data_args)
+254
+255    # Setup logging
+256    logging.basicConfig(
+257        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+258        datefmt="%m/%d/%Y %H:%M:%S",
+259        handlers=[logging.StreamHandler(sys.stdout)],
+260    )
+261
+262    if training_args.should_log:
+263        # The default of training_args.log_level is passive, so we set log level at info here to have that default.
+264        transformers.utils.logging.set_verbosity_info()
+265
+266    log_level = training_args.get_process_log_level()
+267    logger.setLevel(log_level)
+268    datasets.utils.logging.set_verbosity(log_level)
+269    transformers.utils.logging.set_verbosity(log_level)
+270    transformers.utils.logging.enable_default_handler()
+271    transformers.utils.logging.enable_explicit_format()
+272
+273    # Log on each process the small summary:
+274    logger.warning(
+275        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
+276        + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
+277    )
+278    logger.info(f"Training/evaluation parameters {training_args}")
+279
+280    # Detecting last checkpoint.
+281    last_checkpoint = None
+282    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
+283        last_checkpoint = get_last_checkpoint(training_args.output_dir)
+284        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
+285            raise ValueError(
+286                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
+287                "Use --overwrite_output_dir to overcome."
+288            )
+289        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
+290            logger.info(
+291                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
+292                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
+293            )
+294
+295    # Set seed before initializing model.
+296    set_seed(training_args.seed)
+297
+298    # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
+299    # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
+300    # (the dataset will be downloaded automatically from the datasets Hub).
+301    #
+302    # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
+303    # 'text' is found. You can easily tweak this behavior (see below).
+304    #
+305    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
+306    # download the dataset.
+307    if data_args.dataset_name is not None:
+308        # Downloading and loading a dataset from the hub.
+309        raw_datasets = load_dataset(
+310            data_args.dataset_name,
+311            data_args.dataset_config_name,
+312            cache_dir=model_args.cache_dir,
+313            token=model_args.token,
+314            streaming=data_args.streaming,
+315            trust_remote_code=model_args.trust_remote_code,
+316        )
+317        if "validation" not in raw_datasets.keys():
+318            raw_datasets["validation"] = load_dataset(
+319                data_args.dataset_name,
+320                data_args.dataset_config_name,
+321                split=f"train[:{data_args.validation_split_percentage}%]",
+322                cache_dir=model_args.cache_dir,
+323                token=model_args.token,
+324                streaming=data_args.streaming,
+325                trust_remote_code=model_args.trust_remote_code,
+326            )
+327            raw_datasets["train"] = load_dataset(
+328                data_args.dataset_name,
+329                data_args.dataset_config_name,
+330                split=f"train[{data_args.validation_split_percentage}%:]",
+331                cache_dir=model_args.cache_dir,
+332                token=model_args.token,
+333                streaming=data_args.streaming,
+334                trust_remote_code=model_args.trust_remote_code,
+335            )
+336    else:
+337        data_files = {}
+338        dataset_args = {}
+339        if data_args.train_file is not None:
+340            data_files["train"] = data_args.train_file
+341        if data_args.validation_file is not None:
+342            data_files["validation"] = data_args.validation_file
+343        extension = (
+344            data_args.train_file.split(".")[-1]
+345            if data_args.train_file is not None
+346            else data_args.validation_file.split(".")[-1]
+347        )
+348        if extension == "txt":
+349            extension = "text"
+350            dataset_args["keep_linebreaks"] = data_args.keep_linebreaks
+351        raw_datasets = load_dataset(
+352            extension,
+353            data_files=data_files,
+354            cache_dir=model_args.cache_dir,
+355            token=model_args.token,
+356            **dataset_args,
+357        )
+358        # If no validation data is there, validation_split_percentage will be used to divide the dataset.
+359        if "validation" not in raw_datasets.keys():
+360            raw_datasets["validation"] = load_dataset(
+361                extension,
+362                data_files=data_files,
+363                split=f"train[:{data_args.validation_split_percentage}%]",
+364                cache_dir=model_args.cache_dir,
+365                token=model_args.token,
+366                **dataset_args,
+367            )
+368            raw_datasets["train"] = load_dataset(
+369                extension,
+370                data_files=data_files,
+371                split=f"train[{data_args.validation_split_percentage}%:]",
+372                cache_dir=model_args.cache_dir,
+373                token=model_args.token,
+374                **dataset_args,
+375            )
+376
+377    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
+378    # https://huggingface.co/docs/datasets/loading_datasets.
+379
+380    # Load pretrained model and tokenizer
+381    #
+382    # Distributed training:
+383    # The .from_pretrained methods guarantee that only one local process can concurrently
+384    # download model & vocab.
+385
+386    config_kwargs = {
+387        "cache_dir": model_args.cache_dir,
+388        "revision": model_args.model_revision,
+389        "token": model_args.token,
+390        "trust_remote_code": model_args.trust_remote_code,
+391    }
+392    if model_args.config_name:
+393        config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
+394    elif model_args.model_name_or_path:
+395        config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs)
+396    else:
+397        config = CONFIG_MAPPING[model_args.model_type]()
+398        logger.warning("You are instantiating a new config instance from scratch.")
+399        if model_args.config_overrides is not None:
+400            logger.info(f"Overriding config: {model_args.config_overrides}")
+401            config.update_from_string(model_args.config_overrides)
+402            logger.info(f"New config: {config}")
+403
+404    tokenizer_kwargs = {
+405        "cache_dir": model_args.cache_dir,
+406        "use_fast": model_args.use_fast_tokenizer,
+407        "revision": model_args.model_revision,
+408        "token": model_args.token,
+409        "trust_remote_code": model_args.trust_remote_code,
+410    }
+411    if model_args.tokenizer_name:
+412        tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
+413    elif model_args.model_name_or_path:
+414        tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
+415    else:
+416        raise ValueError(
+417            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
+418            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
+419        )
+420
+421    if model_args.model_name_or_path:
+422        torch_dtype = (
+423            model_args.torch_dtype
+424            if model_args.torch_dtype in ["auto", None]
+425            else getattr(torch, model_args.torch_dtype)
+426        )
+427        model = AutoModelForCausalLM.from_pretrained(
+428            model_args.model_name_or_path,
+429            from_tf=bool(".ckpt" in model_args.model_name_or_path),
+430            config=config,
+431            cache_dir=model_args.cache_dir,
+432            revision=model_args.model_revision,
+433            token=model_args.token,
+434            trust_remote_code=model_args.trust_remote_code,
+435            torch_dtype=torch_dtype,
+436            low_cpu_mem_usage=model_args.low_cpu_mem_usage,
+437        )
+438    else:
+439        model = AutoModelForCausalLM.from_config(config, trust_remote_code=model_args.trust_remote_code)
+440        n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
+441        logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
+442
+443    # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
+444    # on a small vocab and want a smaller embedding size, remove this test.
+445    embedding_size = model.get_input_embeddings().weight.shape[0]
+446    if len(tokenizer) > embedding_size:
+447        model.resize_token_embeddings(len(tokenizer))
+448
+449    # Preprocessing the datasets.
+450    # First we tokenize all the texts.
+451    if training_args.do_train:
+452        column_names = list(raw_datasets["train"].features)
+453    else:
+454        column_names = list(raw_datasets["validation"].features)
+455    text_column_name = "text" if "text" in column_names else column_names[0]
+456
+457    # since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function
+458    tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
+459
+460    def tokenize_function(examples):
+461        with CaptureLogger(tok_logger) as cl:
+462            output = tokenizer(examples[text_column_name])
+463        # clm input could be much much longer than block_size
+464        if "Token indices sequence length is longer than the" in cl.out:
+465            tok_logger.warning(
+466                "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits"
+467                " before being passed to the model."
+468            )
+469        return output
+470
+471    with training_args.main_process_first(desc="dataset map tokenization"):
+472        if not data_args.streaming:
+473            tokenized_datasets = raw_datasets.map(
+474                tokenize_function,
+475                batched=True,
+476                num_proc=data_args.preprocessing_num_workers,
+477                remove_columns=column_names,
+478                load_from_cache_file=not data_args.overwrite_cache,
+479                desc="Running tokenizer on dataset",
+480            )
+481        else:
+482            tokenized_datasets = raw_datasets.map(
+483                tokenize_function,
+484                batched=True,
+485                remove_columns=column_names,
+486            )
+487    if hasattr(config, "max_position_embeddings"):
+488        max_pos_embeddings = config.max_position_embeddings
+489    else:
+490        # Define a default value if the attribute is missing in the config.
+491        max_pos_embeddings = 1024
+492
+493    if data_args.block_size is None:
+494        block_size = tokenizer.model_max_length
+495        if block_size > max_pos_embeddings:
+496            logger.warning(
+497                f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
+498                f"Using block_size={min(1024, max_pos_embeddings)} instead. You can change that default value by passing --block_size xxx."
+499            )
+500            if max_pos_embeddings > 0:
+501                block_size = min(1024, max_pos_embeddings)
+502            else:
+503                block_size = 1024
+504    else:
+505        if data_args.block_size > tokenizer.model_max_length:
+506            logger.warning(
+507                f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
+508                f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
+509            )
+510        block_size = min(data_args.block_size, tokenizer.model_max_length)
+511
+512    # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size.
+513    def group_texts(examples):
+514        # Concatenate all texts.
+515        concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
+516        total_length = len(concatenated_examples[list(examples.keys())[0]])
+517        # We drop the small remainder, and if the total_length < block_size  we exclude this batch and return an empty dict.
+518        # We could add padding if the model supported it instead of this drop, you can customize this part to your needs.
+519        total_length = (total_length // block_size) * block_size
+520        # Split by chunks of max_len.
+521        result = {
+522            k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+523            for k, t in concatenated_examples.items()
+524        }
+525        result["labels"] = result["input_ids"].copy()
+526        return result
+527
+528    # Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a remainder
+529    # for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value might be slower
+530    # to preprocess.
+531    #
+532    # To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
+533    # https://huggingface.co/docs/datasets/process#map
+534
+535    with training_args.main_process_first(desc="grouping texts together"):
+536        if not data_args.streaming:
+537            lm_datasets = tokenized_datasets.map(
+538                group_texts,
+539                batched=True,
+540                num_proc=data_args.preprocessing_num_workers,
+541                load_from_cache_file=not data_args.overwrite_cache,
+542                desc=f"Grouping texts in chunks of {block_size}",
+543            )
+544        else:
+545            lm_datasets = tokenized_datasets.map(
+546                group_texts,
+547                batched=True,
+548            )
+549
+550    if training_args.do_train:
+551        if "train" not in tokenized_datasets:
+552            raise ValueError("--do_train requires a train dataset")
+553        train_dataset = lm_datasets["train"]
+554        if data_args.max_train_samples is not None:
+555            max_train_samples = min(len(train_dataset), data_args.max_train_samples)
+556            train_dataset = train_dataset.select(range(max_train_samples))
+557
+558    if training_args.do_eval:
+559        if "validation" not in tokenized_datasets:
+560            raise ValueError("--do_eval requires a validation dataset")
+561        eval_dataset = lm_datasets["validation"]
+562        if data_args.max_eval_samples is not None:
+563            max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
+564            eval_dataset = eval_dataset.select(range(max_eval_samples))
+565
+566        def preprocess_logits_for_metrics(logits, labels):
+567            if isinstance(logits, tuple):
+568                # Depending on the model and config, logits may contain extra tensors,
+569                # like past_key_values, but logits always come first
+570                logits = logits[0]
+571            return logits.argmax(dim=-1)
+572
+573        metric = evaluate.load("accuracy", cache_dir=model_args.cache_dir)
+574
+575        def compute_metrics(eval_preds):
+576            preds, labels = eval_preds
+577            # preds have the same shape as the labels, after the argmax(-1) has been calculated
+578            # by preprocess_logits_for_metrics but we need to shift the labels
+579            labels = labels[:, 1:].reshape(-1)
+580            preds = preds[:, :-1].reshape(-1)
+581            return metric.compute(predictions=preds, references=labels)
+582
+583    # Initialize our Trainer
+584    trainer = Trainer(
+585        model=model,
+586        args=training_args,
+587        train_dataset=train_dataset if training_args.do_train else None,
+588        eval_dataset=eval_dataset if training_args.do_eval else None,
+589        processing_class=tokenizer,
+590        # Data collator will default to DataCollatorWithPadding, so we change it.
+591        data_collator=default_data_collator,
+592        compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None,
+593        preprocess_logits_for_metrics=preprocess_logits_for_metrics
+594        if training_args.do_eval and not is_torch_xla_available()
+595        else None,
+596    )
+597
+598    # Training
+599    if training_args.do_train:
+600        checkpoint = None
+601        if training_args.resume_from_checkpoint is not None:
+602            checkpoint = training_args.resume_from_checkpoint
+603        elif last_checkpoint is not None:
+604            checkpoint = last_checkpoint
+605        train_result = trainer.train(resume_from_checkpoint=checkpoint)
+606        trainer.save_model()  # Saves the tokenizer too for easy upload
+607
+608        metrics = train_result.metrics
+609
+610        max_train_samples = (
+611            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
+612        )
+613        metrics["train_samples"] = min(max_train_samples, len(train_dataset))
+614
+615        trainer.log_metrics("train", metrics)
+616        trainer.save_metrics("train", metrics)
+617        trainer.save_state()
+618
+619    # Evaluation
+620    if training_args.do_eval:
+621        logger.info("*** Evaluate ***")
+622
+623        metrics = trainer.evaluate()
+624
+625        max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
+626        metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
+627        try:
+628            perplexity = math.exp(metrics["eval_loss"])
+629        except OverflowError:
+630            perplexity = float("inf")
+631        metrics["perplexity"] = perplexity
+632
+633        trainer.log_metrics("eval", metrics)
+634        trainer.save_metrics("eval", metrics)
+635
+636    kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-generation"}
+637    if data_args.dataset_name is not None:
+638        kwargs["dataset_tags"] = data_args.dataset_name
+639        if data_args.dataset_config_name is not None:
+640            kwargs["dataset_args"] = data_args.dataset_config_name
+641            kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
+642        else:
+643            kwargs["dataset"] = data_args.dataset_name
+644
+645    if training_args.push_to_hub:
+646        trainer.push_to_hub(**kwargs)
+647    else:
+648        trainer.create_model_card(**kwargs)
+649
+650
+651def _mp_fn(index):
+652    # For xla_spawn (TPUs)
+653    main()
+654
+655
+656if __name__ == "__main__":
+657    main()
+
+
+
1python run_clm.py \
+2    --model_name_or_path openai-community/gpt2 \
+3    --train_file path_to_train_file \
+4    --validation_file path_to_validation_file \
+5    --per_device_train_batch_size 8 \
+6    --per_device_eval_batch_size 8 \
+7    --do_train \
+8    --do_eval \
+9    --output_dir /tmp/test-clm
+
+
+
+
+

4. 使用Diffusers进行模型微调

+

以下代码使用了Diffusers对文生图模型进行微调(来自 diffusers examples),自diffusers v0.27.0版本以后,代码无需任何修改,即可自动检测NPU并进行。

+
   1#!/usr/bin/env python
+   2# coding=utf-8
+   3# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+   4#
+   5# Licensed under the Apache License, Version 2.0 (the "License");
+   6# you may not use this file except in compliance with the License.
+   7# You may obtain a copy of the License at
+   8#
+   9#     http://www.apache.org/licenses/LICENSE-2.0
+  10#
+  11# Unless required by applicable law or agreed to in writing, software
+  12# distributed under the License is distributed on an "AS IS" BASIS,
+  13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  14# See the License for the specific language governing permissions and
+  15# limitations under the License.
+  16
+  17import argparse
+  18import logging
+  19import math
+  20import os
+  21import random
+  22import shutil
+  23from contextlib import nullcontext
+  24from pathlib import Path
+  25
+  26import accelerate
+  27import datasets
+  28import numpy as np
+  29import torch
+  30import torch.nn.functional as F
+  31import torch.utils.checkpoint
+  32import transformers
+  33from accelerate import Accelerator
+  34from accelerate.logging import get_logger
+  35from accelerate.state import AcceleratorState
+  36from accelerate.utils import ProjectConfiguration, set_seed
+  37from datasets import load_dataset
+  38from huggingface_hub import create_repo, upload_folder
+  39from packaging import version
+  40from torchvision import transforms
+  41from tqdm.auto import tqdm
+  42from transformers import CLIPTextModel, CLIPTokenizer
+  43from transformers.utils import ContextManagers
+  44
+  45import diffusers
+  46from diffusers import AutoencoderKL, DDPMScheduler, StableDiffusionPipeline, UNet2DConditionModel
+  47from diffusers.optimization import get_scheduler
+  48from diffusers.training_utils import EMAModel, compute_dream_and_update_latents, compute_snr
+  49from diffusers.utils import check_min_version, deprecate, is_wandb_available, make_image_grid
+  50from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
+  51from diffusers.utils.import_utils import is_xformers_available
+  52from diffusers.utils.torch_utils import is_compiled_module
+  53
+  54
+  55if is_wandb_available():
+  56    import wandb
+  57
+  58
+  59# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
+  60check_min_version("0.32.0.dev0")
+  61
+  62logger = get_logger(__name__, log_level="INFO")
+  63
+  64DATASET_NAME_MAPPING = {
+  65    "lambdalabs/naruto-blip-captions": ("image", "text"),
+  66}
+  67
+  68
+  69def save_model_card(
+  70    args,
+  71    repo_id: str,
+  72    images: list = None,
+  73    repo_folder: str = None,
+  74):
+  75    img_str = ""
+  76    if len(images) > 0:
+  77        image_grid = make_image_grid(images, 1, len(args.validation_prompts))
+  78        image_grid.save(os.path.join(repo_folder, "val_imgs_grid.png"))
+  79        img_str += "![val_imgs_grid](./val_imgs_grid.png)\n"
+  80
+  81    model_description = f"""
+  82# Text-to-image finetuning - {repo_id}
+  83
+  84This pipeline was finetuned from **{args.pretrained_model_name_or_path}** on the **{args.dataset_name}** dataset. Below are some example images generated with the finetuned pipeline using the following prompts: {args.validation_prompts}: \n
+  85{img_str}
+  86
+  87## Pipeline usage
+  88
+  89You can use the pipeline like so:
+  90
+  91```python
+  92from diffusers import DiffusionPipeline
+  93import torch
+  94
+  95pipeline = DiffusionPipeline.from_pretrained("{repo_id}", torch_dtype=torch.float16)
+  96prompt = "{args.validation_prompts[0]}"
+  97image = pipeline(prompt).images[0]
+  98image.save("my_image.png")
+  99```
+ 100
+ 101## Training info
+ 102
+ 103These are the key hyperparameters used during training:
+ 104
+ 105* Epochs: {args.num_train_epochs}
+ 106* Learning rate: {args.learning_rate}
+ 107* Batch size: {args.train_batch_size}
+ 108* Gradient accumulation steps: {args.gradient_accumulation_steps}
+ 109* Image resolution: {args.resolution}
+ 110* Mixed-precision: {args.mixed_precision}
+ 111
+ 112"""
+ 113    wandb_info = ""
+ 114    if is_wandb_available():
+ 115        wandb_run_url = None
+ 116        if wandb.run is not None:
+ 117            wandb_run_url = wandb.run.url
+ 118
+ 119    if wandb_run_url is not None:
+ 120        wandb_info = f"""
+ 121More information on all the CLI arguments and the environment are available on your [`wandb` run page]({wandb_run_url}).
+ 122"""
+ 123
+ 124    model_description += wandb_info
+ 125
+ 126    model_card = load_or_create_model_card(
+ 127        repo_id_or_path=repo_id,
+ 128        from_training=True,
+ 129        license="creativeml-openrail-m",
+ 130        base_model=args.pretrained_model_name_or_path,
+ 131        model_description=model_description,
+ 132        inference=True,
+ 133    )
+ 134
+ 135    tags = ["stable-diffusion", "stable-diffusion-diffusers", "text-to-image", "diffusers", "diffusers-training"]
+ 136    model_card = populate_model_card(model_card, tags=tags)
+ 137
+ 138    model_card.save(os.path.join(repo_folder, "README.md"))
+ 139
+ 140
+ 141def log_validation(vae, text_encoder, tokenizer, unet, args, accelerator, weight_dtype, epoch):
+ 142    logger.info("Running validation... ")
+ 143
+ 144    pipeline = StableDiffusionPipeline.from_pretrained(
+ 145        args.pretrained_model_name_or_path,
+ 146        vae=accelerator.unwrap_model(vae),
+ 147        text_encoder=accelerator.unwrap_model(text_encoder),
+ 148        tokenizer=tokenizer,
+ 149        unet=accelerator.unwrap_model(unet),
+ 150        safety_checker=None,
+ 151        revision=args.revision,
+ 152        variant=args.variant,
+ 153        torch_dtype=weight_dtype,
+ 154    )
+ 155    pipeline = pipeline.to(accelerator.device)
+ 156    pipeline.set_progress_bar_config(disable=True)
+ 157
+ 158    if args.enable_xformers_memory_efficient_attention:
+ 159        pipeline.enable_xformers_memory_efficient_attention()
+ 160
+ 161    if args.seed is None:
+ 162        generator = None
+ 163    else:
+ 164        generator = torch.Generator(device=accelerator.device).manual_seed(args.seed)
+ 165
+ 166    images = []
+ 167    for i in range(len(args.validation_prompts)):
+ 168        if torch.backends.mps.is_available():
+ 169            autocast_ctx = nullcontext()
+ 170        else:
+ 171            autocast_ctx = torch.autocast(accelerator.device.type)
+ 172
+ 173        with autocast_ctx:
+ 174            image = pipeline(args.validation_prompts[i], num_inference_steps=20, generator=generator).images[0]
+ 175
+ 176        images.append(image)
+ 177
+ 178    for tracker in accelerator.trackers:
+ 179        if tracker.name == "tensorboard":
+ 180            np_images = np.stack([np.asarray(img) for img in images])
+ 181            tracker.writer.add_images("validation", np_images, epoch, dataformats="NHWC")
+ 182        elif tracker.name == "wandb":
+ 183            tracker.log(
+ 184                {
+ 185                    "validation": [
+ 186                        wandb.Image(image, caption=f"{i}: {args.validation_prompts[i]}")
+ 187                        for i, image in enumerate(images)
+ 188                    ]
+ 189                }
+ 190            )
+ 191        else:
+ 192            logger.warning(f"image logging not implemented for {tracker.name}")
+ 193
+ 194    del pipeline
+ 195    torch.cuda.empty_cache()
+ 196
+ 197    return images
+ 198
+ 199
+ 200def parse_args():
+ 201    parser = argparse.ArgumentParser(description="Simple example of a training script.")
+ 202    parser.add_argument(
+ 203        "--input_perturbation", type=float, default=0, help="The scale of input perturbation. Recommended 0.1."
+ 204    )
+ 205    parser.add_argument(
+ 206        "--pretrained_model_name_or_path",
+ 207        type=str,
+ 208        default=None,
+ 209        required=True,
+ 210        help="Path to pretrained model or model identifier from huggingface.co/models.",
+ 211    )
+ 212    parser.add_argument(
+ 213        "--revision",
+ 214        type=str,
+ 215        default=None,
+ 216        required=False,
+ 217        help="Revision of pretrained model identifier from huggingface.co/models.",
+ 218    )
+ 219    parser.add_argument(
+ 220        "--variant",
+ 221        type=str,
+ 222        default=None,
+ 223        help="Variant of the model files of the pretrained model identifier from huggingface.co/models, 'e.g.' fp16",
+ 224    )
+ 225    parser.add_argument(
+ 226        "--dataset_name",
+ 227        type=str,
+ 228        default=None,
+ 229        help=(
+ 230            "The name of the Dataset (from the HuggingFace hub) to train on (could be your own, possibly private,"
+ 231            " dataset). It can also be a path pointing to a local copy of a dataset in your filesystem,"
+ 232            " or to a folder containing files that 🤗 Datasets can understand."
+ 233        ),
+ 234    )
+ 235    parser.add_argument(
+ 236        "--dataset_config_name",
+ 237        type=str,
+ 238        default=None,
+ 239        help="The config of the Dataset, leave as None if there's only one config.",
+ 240    )
+ 241    parser.add_argument(
+ 242        "--train_data_dir",
+ 243        type=str,
+ 244        default=None,
+ 245        help=(
+ 246            "A folder containing the training data. Folder contents must follow the structure described in"
+ 247            " https://huggingface.co/docs/datasets/image_dataset#imagefolder. In particular, a `metadata.jsonl` file"
+ 248            " must exist to provide the captions for the images. Ignored if `dataset_name` is specified."
+ 249        ),
+ 250    )
+ 251    parser.add_argument(
+ 252        "--image_column", type=str, default="image", help="The column of the dataset containing an image."
+ 253    )
+ 254    parser.add_argument(
+ 255        "--caption_column",
+ 256        type=str,
+ 257        default="text",
+ 258        help="The column of the dataset containing a caption or a list of captions.",
+ 259    )
+ 260    parser.add_argument(
+ 261        "--max_train_samples",
+ 262        type=int,
+ 263        default=None,
+ 264        help=(
+ 265            "For debugging purposes or quicker training, truncate the number of training examples to this "
+ 266            "value if set."
+ 267        ),
+ 268    )
+ 269    parser.add_argument(
+ 270        "--validation_prompts",
+ 271        type=str,
+ 272        default=None,
+ 273        nargs="+",
+ 274        help=("A set of prompts evaluated every `--validation_epochs` and logged to `--report_to`."),
+ 275    )
+ 276    parser.add_argument(
+ 277        "--output_dir",
+ 278        type=str,
+ 279        default="sd-model-finetuned",
+ 280        help="The output directory where the model predictions and checkpoints will be written.",
+ 281    )
+ 282    parser.add_argument(
+ 283        "--cache_dir",
+ 284        type=str,
+ 285        default=None,
+ 286        help="The directory where the downloaded models and datasets will be stored.",
+ 287    )
+ 288    parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.")
+ 289    parser.add_argument(
+ 290        "--resolution",
+ 291        type=int,
+ 292        default=512,
+ 293        help=(
+ 294            "The resolution for input images, all the images in the train/validation dataset will be resized to this"
+ 295            " resolution"
+ 296        ),
+ 297    )
+ 298    parser.add_argument(
+ 299        "--center_crop",
+ 300        default=False,
+ 301        action="store_true",
+ 302        help=(
+ 303            "Whether to center crop the input images to the resolution. If not set, the images will be randomly"
+ 304            " cropped. The images will be resized to the resolution first before cropping."
+ 305        ),
+ 306    )
+ 307    parser.add_argument(
+ 308        "--random_flip",
+ 309        action="store_true",
+ 310        help="whether to randomly flip images horizontally",
+ 311    )
+ 312    parser.add_argument(
+ 313        "--train_batch_size", type=int, default=16, help="Batch size (per device) for the training dataloader."
+ 314    )
+ 315    parser.add_argument("--num_train_epochs", type=int, default=100)
+ 316    parser.add_argument(
+ 317        "--max_train_steps",
+ 318        type=int,
+ 319        default=None,
+ 320        help="Total number of training steps to perform.  If provided, overrides num_train_epochs.",
+ 321    )
+ 322    parser.add_argument(
+ 323        "--gradient_accumulation_steps",
+ 324        type=int,
+ 325        default=1,
+ 326        help="Number of updates steps to accumulate before performing a backward/update pass.",
+ 327    )
+ 328    parser.add_argument(
+ 329        "--gradient_checkpointing",
+ 330        action="store_true",
+ 331        help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.",
+ 332    )
+ 333    parser.add_argument(
+ 334        "--learning_rate",
+ 335        type=float,
+ 336        default=1e-4,
+ 337        help="Initial learning rate (after the potential warmup period) to use.",
+ 338    )
+ 339    parser.add_argument(
+ 340        "--scale_lr",
+ 341        action="store_true",
+ 342        default=False,
+ 343        help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.",
+ 344    )
+ 345    parser.add_argument(
+ 346        "--lr_scheduler",
+ 347        type=str,
+ 348        default="constant",
+ 349        help=(
+ 350            'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",'
+ 351            ' "constant", "constant_with_warmup"]'
+ 352        ),
+ 353    )
+ 354    parser.add_argument(
+ 355        "--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler."
+ 356    )
+ 357    parser.add_argument(
+ 358        "--snr_gamma",
+ 359        type=float,
+ 360        default=None,
+ 361        help="SNR weighting gamma to be used if rebalancing the loss. Recommended value is 5.0. "
+ 362        "More details here: https://arxiv.org/abs/2303.09556.",
+ 363    )
+ 364    parser.add_argument(
+ 365        "--dream_training",
+ 366        action="store_true",
+ 367        help=(
+ 368            "Use the DREAM training method, which makes training more efficient and accurate at the ",
+ 369            "expense of doing an extra forward pass. See: https://arxiv.org/abs/2312.00210",
+ 370        ),
+ 371    )
+ 372    parser.add_argument(
+ 373        "--dream_detail_preservation",
+ 374        type=float,
+ 375        default=1.0,
+ 376        help="Dream detail preservation factor p (should be greater than 0; default=1.0, as suggested in the paper)",
+ 377    )
+ 378    parser.add_argument(
+ 379        "--use_8bit_adam", action="store_true", help="Whether or not to use 8-bit Adam from bitsandbytes."
+ 380    )
+ 381    parser.add_argument(
+ 382        "--allow_tf32",
+ 383        action="store_true",
+ 384        help=(
+ 385            "Whether or not to allow TF32 on Ampere GPUs. Can be used to speed up training. For more information, see"
+ 386            " https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices"
+ 387        ),
+ 388    )
+ 389    parser.add_argument("--use_ema", action="store_true", help="Whether to use EMA model.")
+ 390    parser.add_argument("--offload_ema", action="store_true", help="Offload EMA model to CPU during training step.")
+ 391    parser.add_argument("--foreach_ema", action="store_true", help="Use faster foreach implementation of EMAModel.")
+ 392    parser.add_argument(
+ 393        "--non_ema_revision",
+ 394        type=str,
+ 395        default=None,
+ 396        required=False,
+ 397        help=(
+ 398            "Revision of pretrained non-ema model identifier. Must be a branch, tag or git identifier of the local or"
+ 399            " remote repository specified with --pretrained_model_name_or_path."
+ 400        ),
+ 401    )
+ 402    parser.add_argument(
+ 403        "--dataloader_num_workers",
+ 404        type=int,
+ 405        default=0,
+ 406        help=(
+ 407            "Number of subprocesses to use for data loading. 0 means that the data will be loaded in the main process."
+ 408        ),
+ 409    )
+ 410    parser.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.")
+ 411    parser.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.")
+ 412    parser.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.")
+ 413    parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer")
+ 414    parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
+ 415    parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
+ 416    parser.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.")
+ 417    parser.add_argument(
+ 418        "--prediction_type",
+ 419        type=str,
+ 420        default=None,
+ 421        help="The prediction_type that shall be used for training. Choose between 'epsilon' or 'v_prediction' or leave `None`. If left to `None` the default prediction type of the scheduler: `noise_scheduler.config.prediction_type` is chosen.",
+ 422    )
+ 423    parser.add_argument(
+ 424        "--hub_model_id",
+ 425        type=str,
+ 426        default=None,
+ 427        help="The name of the repository to keep in sync with the local `output_dir`.",
+ 428    )
+ 429    parser.add_argument(
+ 430        "--logging_dir",
+ 431        type=str,
+ 432        default="logs",
+ 433        help=(
+ 434            "[TensorBoard](https://www.tensorflow.org/tensorboard) log directory. Will default to"
+ 435            " *output_dir/runs/**CURRENT_DATETIME_HOSTNAME***."
+ 436        ),
+ 437    )
+ 438    parser.add_argument(
+ 439        "--mixed_precision",
+ 440        type=str,
+ 441        default=None,
+ 442        choices=["no", "fp16", "bf16"],
+ 443        help=(
+ 444            "Whether to use mixed precision. Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >="
+ 445            " 1.10.and an Nvidia Ampere GPU.  Default to the value of accelerate config of the current system or the"
+ 446            " flag passed with the `accelerate.launch` command. Use this argument to override the accelerate config."
+ 447        ),
+ 448    )
+ 449    parser.add_argument(
+ 450        "--report_to",
+ 451        type=str,
+ 452        default="tensorboard",
+ 453        help=(
+ 454            'The integration to report the results and logs to. Supported platforms are `"tensorboard"`'
+ 455            ' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.'
+ 456        ),
+ 457    )
+ 458    parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
+ 459    parser.add_argument(
+ 460        "--checkpointing_steps",
+ 461        type=int,
+ 462        default=500,
+ 463        help=(
+ 464            "Save a checkpoint of the training state every X updates. These checkpoints are only suitable for resuming"
+ 465            " training using `--resume_from_checkpoint`."
+ 466        ),
+ 467    )
+ 468    parser.add_argument(
+ 469        "--checkpoints_total_limit",
+ 470        type=int,
+ 471        default=None,
+ 472        help=("Max number of checkpoints to store."),
+ 473    )
+ 474    parser.add_argument(
+ 475        "--resume_from_checkpoint",
+ 476        type=str,
+ 477        default=None,
+ 478        help=(
+ 479            "Whether training should be resumed from a previous checkpoint. Use a path saved by"
+ 480            ' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.'
+ 481        ),
+ 482    )
+ 483    parser.add_argument(
+ 484        "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
+ 485    )
+ 486    parser.add_argument("--noise_offset", type=float, default=0, help="The scale of noise offset.")
+ 487    parser.add_argument(
+ 488        "--validation_epochs",
+ 489        type=int,
+ 490        default=5,
+ 491        help="Run validation every X epochs.",
+ 492    )
+ 493    parser.add_argument(
+ 494        "--tracker_project_name",
+ 495        type=str,
+ 496        default="text2image-fine-tune",
+ 497        help=(
+ 498            "The `project_name` argument passed to Accelerator.init_trackers for"
+ 499            " more information see https://huggingface.co/docs/accelerate/v0.17.0/en/package_reference/accelerator#accelerate.Accelerator"
+ 500        ),
+ 501    )
+ 502
+ 503    args = parser.parse_args()
+ 504    env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
+ 505    if env_local_rank != -1 and env_local_rank != args.local_rank:
+ 506        args.local_rank = env_local_rank
+ 507
+ 508    # Sanity checks
+ 509    if args.dataset_name is None and args.train_data_dir is None:
+ 510        raise ValueError("Need either a dataset name or a training folder.")
+ 511
+ 512    # default to using the same revision for the non-ema model if not specified
+ 513    if args.non_ema_revision is None:
+ 514        args.non_ema_revision = args.revision
+ 515
+ 516    return args
+ 517
+ 518
+ 519def main():
+ 520    args = parse_args()
+ 521
+ 522    if args.report_to == "wandb" and args.hub_token is not None:
+ 523        raise ValueError(
+ 524            "You cannot use both --report_to=wandb and --hub_token due to a security risk of exposing your token."
+ 525            " Please use `huggingface-cli login` to authenticate with the Hub."
+ 526        )
+ 527
+ 528    if args.non_ema_revision is not None:
+ 529        deprecate(
+ 530            "non_ema_revision!=None",
+ 531            "0.15.0",
+ 532            message=(
+ 533                "Downloading 'non_ema' weights from revision branches of the Hub is deprecated. Please make sure to"
+ 534                " use `--variant=non_ema` instead."
+ 535            ),
+ 536        )
+ 537    logging_dir = os.path.join(args.output_dir, args.logging_dir)
+ 538
+ 539    accelerator_project_config = ProjectConfiguration(project_dir=args.output_dir, logging_dir=logging_dir)
+ 540
+ 541    accelerator = Accelerator(
+ 542        gradient_accumulation_steps=args.gradient_accumulation_steps,
+ 543        mixed_precision=args.mixed_precision,
+ 544        log_with=args.report_to,
+ 545        project_config=accelerator_project_config,
+ 546    )
+ 547
+ 548    # Disable AMP for MPS.
+ 549    if torch.backends.mps.is_available():
+ 550        accelerator.native_amp = False
+ 551
+ 552    # Make one log on every process with the configuration for debugging.
+ 553    logging.basicConfig(
+ 554        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+ 555        datefmt="%m/%d/%Y %H:%M:%S",
+ 556        level=logging.INFO,
+ 557    )
+ 558    logger.info(accelerator.state, main_process_only=False)
+ 559    if accelerator.is_local_main_process:
+ 560        datasets.utils.logging.set_verbosity_warning()
+ 561        transformers.utils.logging.set_verbosity_warning()
+ 562        diffusers.utils.logging.set_verbosity_info()
+ 563    else:
+ 564        datasets.utils.logging.set_verbosity_error()
+ 565        transformers.utils.logging.set_verbosity_error()
+ 566        diffusers.utils.logging.set_verbosity_error()
+ 567
+ 568    # If passed along, set the training seed now.
+ 569    if args.seed is not None:
+ 570        set_seed(args.seed)
+ 571
+ 572    # Handle the repository creation
+ 573    if accelerator.is_main_process:
+ 574        if args.output_dir is not None:
+ 575            os.makedirs(args.output_dir, exist_ok=True)
+ 576
+ 577        if args.push_to_hub:
+ 578            repo_id = create_repo(
+ 579                repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token
+ 580            ).repo_id
+ 581
+ 582    # Load scheduler, tokenizer and models.
+ 583    noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
+ 584    tokenizer = CLIPTokenizer.from_pretrained(
+ 585        args.pretrained_model_name_or_path, subfolder="tokenizer", revision=args.revision
+ 586    )
+ 587
+ 588    def deepspeed_zero_init_disabled_context_manager():
+ 589        """
+ 590        returns either a context list that includes one that will disable zero.Init or an empty context list
+ 591        """
+ 592        deepspeed_plugin = AcceleratorState().deepspeed_plugin if accelerate.state.is_initialized() else None
+ 593        if deepspeed_plugin is None:
+ 594            return []
+ 595
+ 596        return [deepspeed_plugin.zero3_init_context_manager(enable=False)]
+ 597
+ 598    # Currently Accelerate doesn't know how to handle multiple models under Deepspeed ZeRO stage 3.
+ 599    # For this to work properly all models must be run through `accelerate.prepare`. But accelerate
+ 600    # will try to assign the same optimizer with the same weights to all models during
+ 601    # `deepspeed.initialize`, which of course doesn't work.
+ 602    #
+ 603    # For now the following workaround will partially support Deepspeed ZeRO-3, by excluding the 2
+ 604    # frozen models from being partitioned during `zero.Init` which gets called during
+ 605    # `from_pretrained` So CLIPTextModel and AutoencoderKL will not enjoy the parameter sharding
+ 606    # across multiple gpus and only UNet2DConditionModel will get ZeRO sharded.
+ 607    with ContextManagers(deepspeed_zero_init_disabled_context_manager()):
+ 608        text_encoder = CLIPTextModel.from_pretrained(
+ 609            args.pretrained_model_name_or_path, subfolder="text_encoder", revision=args.revision, variant=args.variant
+ 610        )
+ 611        vae = AutoencoderKL.from_pretrained(
+ 612            args.pretrained_model_name_or_path, subfolder="vae", revision=args.revision, variant=args.variant
+ 613        )
+ 614
+ 615    unet = UNet2DConditionModel.from_pretrained(
+ 616        args.pretrained_model_name_or_path, subfolder="unet", revision=args.non_ema_revision
+ 617    )
+ 618
+ 619    # Freeze vae and text_encoder and set unet to trainable
+ 620    vae.requires_grad_(False)
+ 621    text_encoder.requires_grad_(False)
+ 622    unet.train()
+ 623
+ 624    # Create EMA for the unet.
+ 625    if args.use_ema:
+ 626        ema_unet = UNet2DConditionModel.from_pretrained(
+ 627            args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision, variant=args.variant
+ 628        )
+ 629        ema_unet = EMAModel(
+ 630            ema_unet.parameters(),
+ 631            model_cls=UNet2DConditionModel,
+ 632            model_config=ema_unet.config,
+ 633            foreach=args.foreach_ema,
+ 634        )
+ 635
+ 636    if args.enable_xformers_memory_efficient_attention:
+ 637        if is_xformers_available():
+ 638            import xformers
+ 639
+ 640            xformers_version = version.parse(xformers.__version__)
+ 641            if xformers_version == version.parse("0.0.16"):
+ 642                logger.warning(
+ 643                    "xFormers 0.0.16 cannot be used for training in some GPUs. If you observe problems during training, please update xFormers to at least 0.0.17. See https://huggingface.co/docs/diffusers/main/en/optimization/xformers for more details."
+ 644                )
+ 645            unet.enable_xformers_memory_efficient_attention()
+ 646        else:
+ 647            raise ValueError("xformers is not available. Make sure it is installed correctly")
+ 648
+ 649    # `accelerate` 0.16.0 will have better support for customized saving
+ 650    if version.parse(accelerate.__version__) >= version.parse("0.16.0"):
+ 651        # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
+ 652        def save_model_hook(models, weights, output_dir):
+ 653            if accelerator.is_main_process:
+ 654                if args.use_ema:
+ 655                    ema_unet.save_pretrained(os.path.join(output_dir, "unet_ema"))
+ 656
+ 657                for i, model in enumerate(models):
+ 658                    model.save_pretrained(os.path.join(output_dir, "unet"))
+ 659
+ 660                    # make sure to pop weight so that corresponding model is not saved again
+ 661                    weights.pop()
+ 662
+ 663        def load_model_hook(models, input_dir):
+ 664            if args.use_ema:
+ 665                load_model = EMAModel.from_pretrained(
+ 666                    os.path.join(input_dir, "unet_ema"), UNet2DConditionModel, foreach=args.foreach_ema
+ 667                )
+ 668                ema_unet.load_state_dict(load_model.state_dict())
+ 669                if args.offload_ema:
+ 670                    ema_unet.pin_memory()
+ 671                else:
+ 672                    ema_unet.to(accelerator.device)
+ 673                del load_model
+ 674
+ 675            for _ in range(len(models)):
+ 676                # pop models so that they are not loaded again
+ 677                model = models.pop()
+ 678
+ 679                # load diffusers style into model
+ 680                load_model = UNet2DConditionModel.from_pretrained(input_dir, subfolder="unet")
+ 681                model.register_to_config(**load_model.config)
+ 682
+ 683                model.load_state_dict(load_model.state_dict())
+ 684                del load_model
+ 685
+ 686        accelerator.register_save_state_pre_hook(save_model_hook)
+ 687        accelerator.register_load_state_pre_hook(load_model_hook)
+ 688
+ 689    if args.gradient_checkpointing:
+ 690        unet.enable_gradient_checkpointing()
+ 691
+ 692    # Enable TF32 for faster training on Ampere GPUs,
+ 693    # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices
+ 694    if args.allow_tf32:
+ 695        torch.backends.cuda.matmul.allow_tf32 = True
+ 696
+ 697    if args.scale_lr:
+ 698        args.learning_rate = (
+ 699            args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes
+ 700        )
+ 701
+ 702    # Initialize the optimizer
+ 703    if args.use_8bit_adam:
+ 704        try:
+ 705            import bitsandbytes as bnb
+ 706        except ImportError:
+ 707            raise ImportError(
+ 708                "Please install bitsandbytes to use 8-bit Adam. You can do so by running `pip install bitsandbytes`"
+ 709            )
+ 710
+ 711        optimizer_cls = bnb.optim.AdamW8bit
+ 712    else:
+ 713        optimizer_cls = torch.optim.AdamW
+ 714
+ 715    optimizer = optimizer_cls(
+ 716        unet.parameters(),
+ 717        lr=args.learning_rate,
+ 718        betas=(args.adam_beta1, args.adam_beta2),
+ 719        weight_decay=args.adam_weight_decay,
+ 720        eps=args.adam_epsilon,
+ 721    )
+ 722
+ 723    # Get the datasets: you can either provide your own training and evaluation files (see below)
+ 724    # or specify a Dataset from the hub (the dataset will be downloaded automatically from the datasets Hub).
+ 725
+ 726    # In distributed training, the load_dataset function guarantees that only one local process can concurrently
+ 727    # download the dataset.
+ 728    if args.dataset_name is not None:
+ 729        # Downloading and loading a dataset from the hub.
+ 730        dataset = load_dataset(
+ 731            args.dataset_name,
+ 732            args.dataset_config_name,
+ 733            cache_dir=args.cache_dir,
+ 734            data_dir=args.train_data_dir,
+ 735        )
+ 736    else:
+ 737        data_files = {}
+ 738        if args.train_data_dir is not None:
+ 739            data_files["train"] = os.path.join(args.train_data_dir, "**")
+ 740        dataset = load_dataset(
+ 741            "imagefolder",
+ 742            data_files=data_files,
+ 743            cache_dir=args.cache_dir,
+ 744        )
+ 745        # See more about loading custom images at
+ 746        # https://huggingface.co/docs/datasets/v2.4.0/en/image_load#imagefolder
+ 747
+ 748    # Preprocessing the datasets.
+ 749    # We need to tokenize inputs and targets.
+ 750    column_names = dataset["train"].column_names
+ 751
+ 752    # 6. Get the column names for input/target.
+ 753    dataset_columns = DATASET_NAME_MAPPING.get(args.dataset_name, None)
+ 754    if args.image_column is None:
+ 755        image_column = dataset_columns[0] if dataset_columns is not None else column_names[0]
+ 756    else:
+ 757        image_column = args.image_column
+ 758        if image_column not in column_names:
+ 759            raise ValueError(
+ 760                f"--image_column' value '{args.image_column}' needs to be one of: {', '.join(column_names)}"
+ 761            )
+ 762    if args.caption_column is None:
+ 763        caption_column = dataset_columns[1] if dataset_columns is not None else column_names[1]
+ 764    else:
+ 765        caption_column = args.caption_column
+ 766        if caption_column not in column_names:
+ 767            raise ValueError(
+ 768                f"--caption_column' value '{args.caption_column}' needs to be one of: {', '.join(column_names)}"
+ 769            )
+ 770
+ 771    # Preprocessing the datasets.
+ 772    # We need to tokenize input captions and transform the images.
+ 773    def tokenize_captions(examples, is_train=True):
+ 774        captions = []
+ 775        for caption in examples[caption_column]:
+ 776            if isinstance(caption, str):
+ 777                captions.append(caption)
+ 778            elif isinstance(caption, (list, np.ndarray)):
+ 779                # take a random caption if there are multiple
+ 780                captions.append(random.choice(caption) if is_train else caption[0])
+ 781            else:
+ 782                raise ValueError(
+ 783                    f"Caption column `{caption_column}` should contain either strings or lists of strings."
+ 784                )
+ 785        inputs = tokenizer(
+ 786            captions, max_length=tokenizer.model_max_length, padding="max_length", truncation=True, return_tensors="pt"
+ 787        )
+ 788        return inputs.input_ids
+ 789
+ 790    # Preprocessing the datasets.
+ 791    train_transforms = transforms.Compose(
+ 792        [
+ 793            transforms.Resize(args.resolution, interpolation=transforms.InterpolationMode.BILINEAR),
+ 794            transforms.CenterCrop(args.resolution) if args.center_crop else transforms.RandomCrop(args.resolution),
+ 795            transforms.RandomHorizontalFlip() if args.random_flip else transforms.Lambda(lambda x: x),
+ 796            transforms.ToTensor(),
+ 797            transforms.Normalize([0.5], [0.5]),
+ 798        ]
+ 799    )
+ 800
+ 801    def preprocess_train(examples):
+ 802        images = [image.convert("RGB") for image in examples[image_column]]
+ 803        examples["pixel_values"] = [train_transforms(image) for image in images]
+ 804        examples["input_ids"] = tokenize_captions(examples)
+ 805        return examples
+ 806
+ 807    with accelerator.main_process_first():
+ 808        if args.max_train_samples is not None:
+ 809            dataset["train"] = dataset["train"].shuffle(seed=args.seed).select(range(args.max_train_samples))
+ 810        # Set the training transforms
+ 811        train_dataset = dataset["train"].with_transform(preprocess_train)
+ 812
+ 813    def collate_fn(examples):
+ 814        pixel_values = torch.stack([example["pixel_values"] for example in examples])
+ 815        pixel_values = pixel_values.to(memory_format=torch.contiguous_format).float()
+ 816        input_ids = torch.stack([example["input_ids"] for example in examples])
+ 817        return {"pixel_values": pixel_values, "input_ids": input_ids}
+ 818
+ 819    # DataLoaders creation:
+ 820    train_dataloader = torch.utils.data.DataLoader(
+ 821        train_dataset,
+ 822        shuffle=True,
+ 823        collate_fn=collate_fn,
+ 824        batch_size=args.train_batch_size,
+ 825        num_workers=args.dataloader_num_workers,
+ 826    )
+ 827
+ 828    # Scheduler and math around the number of training steps.
+ 829    # Check the PR https://github.com/huggingface/diffusers/pull/8312 for detailed explanation.
+ 830    num_warmup_steps_for_scheduler = args.lr_warmup_steps * accelerator.num_processes
+ 831    if args.max_train_steps is None:
+ 832        len_train_dataloader_after_sharding = math.ceil(len(train_dataloader) / accelerator.num_processes)
+ 833        num_update_steps_per_epoch = math.ceil(len_train_dataloader_after_sharding / args.gradient_accumulation_steps)
+ 834        num_training_steps_for_scheduler = (
+ 835            args.num_train_epochs * num_update_steps_per_epoch * accelerator.num_processes
+ 836        )
+ 837    else:
+ 838        num_training_steps_for_scheduler = args.max_train_steps * accelerator.num_processes
+ 839
+ 840    lr_scheduler = get_scheduler(
+ 841        args.lr_scheduler,
+ 842        optimizer=optimizer,
+ 843        num_warmup_steps=num_warmup_steps_for_scheduler,
+ 844        num_training_steps=num_training_steps_for_scheduler,
+ 845    )
+ 846
+ 847    # Prepare everything with our `accelerator`.
+ 848    unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
+ 849        unet, optimizer, train_dataloader, lr_scheduler
+ 850    )
+ 851
+ 852    if args.use_ema:
+ 853        if args.offload_ema:
+ 854            ema_unet.pin_memory()
+ 855        else:
+ 856            ema_unet.to(accelerator.device)
+ 857
+ 858    # For mixed precision training we cast all non-trainable weights (vae, non-lora text_encoder and non-lora unet) to half-precision
+ 859    # as these weights are only used for inference, keeping weights in full precision is not required.
+ 860    weight_dtype = torch.float32
+ 861    if accelerator.mixed_precision == "fp16":
+ 862        weight_dtype = torch.float16
+ 863        args.mixed_precision = accelerator.mixed_precision
+ 864    elif accelerator.mixed_precision == "bf16":
+ 865        weight_dtype = torch.bfloat16
+ 866        args.mixed_precision = accelerator.mixed_precision
+ 867
+ 868    # Move text_encode and vae to gpu and cast to weight_dtype
+ 869    text_encoder.to(accelerator.device, dtype=weight_dtype)
+ 870    vae.to(accelerator.device, dtype=weight_dtype)
+ 871
+ 872    # We need to recalculate our total training steps as the size of the training dataloader may have changed.
+ 873    num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
+ 874    if args.max_train_steps is None:
+ 875        args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
+ 876        if num_training_steps_for_scheduler != args.max_train_steps * accelerator.num_processes:
+ 877            logger.warning(
+ 878                f"The length of the 'train_dataloader' after 'accelerator.prepare' ({len(train_dataloader)}) does not match "
+ 879                f"the expected length ({len_train_dataloader_after_sharding}) when the learning rate scheduler was created. "
+ 880                f"This inconsistency may result in the learning rate scheduler not functioning properly."
+ 881            )
+ 882    # Afterwards we recalculate our number of training epochs
+ 883    args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
+ 884
+ 885    # We need to initialize the trackers we use, and also store our configuration.
+ 886    # The trackers initializes automatically on the main process.
+ 887    if accelerator.is_main_process:
+ 888        tracker_config = dict(vars(args))
+ 889        tracker_config.pop("validation_prompts")
+ 890        accelerator.init_trackers(args.tracker_project_name, tracker_config)
+ 891
+ 892    # Function for unwrapping if model was compiled with `torch.compile`.
+ 893    def unwrap_model(model):
+ 894        model = accelerator.unwrap_model(model)
+ 895        model = model._orig_mod if is_compiled_module(model) else model
+ 896        return model
+ 897
+ 898    # Train!
+ 899    total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
+ 900
+ 901    logger.info("***** Running training *****")
+ 902    logger.info(f"  Num examples = {len(train_dataset)}")
+ 903    logger.info(f"  Num Epochs = {args.num_train_epochs}")
+ 904    logger.info(f"  Instantaneous batch size per device = {args.train_batch_size}")
+ 905    logger.info(f"  Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}")
+ 906    logger.info(f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}")
+ 907    logger.info(f"  Total optimization steps = {args.max_train_steps}")
+ 908    global_step = 0
+ 909    first_epoch = 0
+ 910
+ 911    # Potentially load in the weights and states from a previous save
+ 912    if args.resume_from_checkpoint:
+ 913        if args.resume_from_checkpoint != "latest":
+ 914            path = os.path.basename(args.resume_from_checkpoint)
+ 915        else:
+ 916            # Get the most recent checkpoint
+ 917            dirs = os.listdir(args.output_dir)
+ 918            dirs = [d for d in dirs if d.startswith("checkpoint")]
+ 919            dirs = sorted(dirs, key=lambda x: int(x.split("-")[1]))
+ 920            path = dirs[-1] if len(dirs) > 0 else None
+ 921
+ 922        if path is None:
+ 923            accelerator.print(
+ 924                f"Checkpoint '{args.resume_from_checkpoint}' does not exist. Starting a new training run."
+ 925            )
+ 926            args.resume_from_checkpoint = None
+ 927            initial_global_step = 0
+ 928        else:
+ 929            accelerator.print(f"Resuming from checkpoint {path}")
+ 930            accelerator.load_state(os.path.join(args.output_dir, path))
+ 931            global_step = int(path.split("-")[1])
+ 932
+ 933            initial_global_step = global_step
+ 934            first_epoch = global_step // num_update_steps_per_epoch
+ 935
+ 936    else:
+ 937        initial_global_step = 0
+ 938
+ 939    progress_bar = tqdm(
+ 940        range(0, args.max_train_steps),
+ 941        initial=initial_global_step,
+ 942        desc="Steps",
+ 943        # Only show the progress bar once on each machine.
+ 944        disable=not accelerator.is_local_main_process,
+ 945    )
+ 946
+ 947    for epoch in range(first_epoch, args.num_train_epochs):
+ 948        train_loss = 0.0
+ 949        for step, batch in enumerate(train_dataloader):
+ 950            with accelerator.accumulate(unet):
+ 951                # Convert images to latent space
+ 952                latents = vae.encode(batch["pixel_values"].to(weight_dtype)).latent_dist.sample()
+ 953                latents = latents * vae.config.scaling_factor
+ 954
+ 955                # Sample noise that we'll add to the latents
+ 956                noise = torch.randn_like(latents)
+ 957                if args.noise_offset:
+ 958                    # https://www.crosslabs.org//blog/diffusion-with-offset-noise
+ 959                    noise += args.noise_offset * torch.randn(
+ 960                        (latents.shape[0], latents.shape[1], 1, 1), device=latents.device
+ 961                    )
+ 962                if args.input_perturbation:
+ 963                    new_noise = noise + args.input_perturbation * torch.randn_like(noise)
+ 964                bsz = latents.shape[0]
+ 965                # Sample a random timestep for each image
+ 966                timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,), device=latents.device)
+ 967                timesteps = timesteps.long()
+ 968
+ 969                # Add noise to the latents according to the noise magnitude at each timestep
+ 970                # (this is the forward diffusion process)
+ 971                if args.input_perturbation:
+ 972                    noisy_latents = noise_scheduler.add_noise(latents, new_noise, timesteps)
+ 973                else:
+ 974                    noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
+ 975
+ 976                # Get the text embedding for conditioning
+ 977                encoder_hidden_states = text_encoder(batch["input_ids"], return_dict=False)[0]
+ 978
+ 979                # Get the target for loss depending on the prediction type
+ 980                if args.prediction_type is not None:
+ 981                    # set prediction_type of scheduler if defined
+ 982                    noise_scheduler.register_to_config(prediction_type=args.prediction_type)
+ 983
+ 984                if noise_scheduler.config.prediction_type == "epsilon":
+ 985                    target = noise
+ 986                elif noise_scheduler.config.prediction_type == "v_prediction":
+ 987                    target = noise_scheduler.get_velocity(latents, noise, timesteps)
+ 988                else:
+ 989                    raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")
+ 990
+ 991                if args.dream_training:
+ 992                    noisy_latents, target = compute_dream_and_update_latents(
+ 993                        unet,
+ 994                        noise_scheduler,
+ 995                        timesteps,
+ 996                        noise,
+ 997                        noisy_latents,
+ 998                        target,
+ 999                        encoder_hidden_states,
+1000                        args.dream_detail_preservation,
+1001                    )
+1002
+1003                # Predict the noise residual and compute loss
+1004                model_pred = unet(noisy_latents, timesteps, encoder_hidden_states, return_dict=False)[0]
+1005
+1006                if args.snr_gamma is None:
+1007                    loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean")
+1008                else:
+1009                    # Compute loss-weights as per Section 3.4 of https://arxiv.org/abs/2303.09556.
+1010                    # Since we predict the noise instead of x_0, the original formulation is slightly changed.
+1011                    # This is discussed in Section 4.2 of the same paper.
+1012                    snr = compute_snr(noise_scheduler, timesteps)
+1013                    mse_loss_weights = torch.stack([snr, args.snr_gamma * torch.ones_like(timesteps)], dim=1).min(
+1014                        dim=1
+1015                    )[0]
+1016                    if noise_scheduler.config.prediction_type == "epsilon":
+1017                        mse_loss_weights = mse_loss_weights / snr
+1018                    elif noise_scheduler.config.prediction_type == "v_prediction":
+1019                        mse_loss_weights = mse_loss_weights / (snr + 1)
+1020
+1021                    loss = F.mse_loss(model_pred.float(), target.float(), reduction="none")
+1022                    loss = loss.mean(dim=list(range(1, len(loss.shape)))) * mse_loss_weights
+1023                    loss = loss.mean()
+1024
+1025                # Gather the losses across all processes for logging (if we use distributed training).
+1026                avg_loss = accelerator.gather(loss.repeat(args.train_batch_size)).mean()
+1027                train_loss += avg_loss.item() / args.gradient_accumulation_steps
+1028
+1029                # Backpropagate
+1030                accelerator.backward(loss)
+1031                if accelerator.sync_gradients:
+1032                    accelerator.clip_grad_norm_(unet.parameters(), args.max_grad_norm)
+1033                optimizer.step()
+1034                lr_scheduler.step()
+1035                optimizer.zero_grad()
+1036
+1037            # Checks if the accelerator has performed an optimization step behind the scenes
+1038            if accelerator.sync_gradients:
+1039                if args.use_ema:
+1040                    if args.offload_ema:
+1041                        ema_unet.to(device="cuda", non_blocking=True)
+1042                    ema_unet.step(unet.parameters())
+1043                    if args.offload_ema:
+1044                        ema_unet.to(device="cpu", non_blocking=True)
+1045                progress_bar.update(1)
+1046                global_step += 1
+1047                accelerator.log({"train_loss": train_loss}, step=global_step)
+1048                train_loss = 0.0
+1049
+1050                if global_step % args.checkpointing_steps == 0:
+1051                    if accelerator.is_main_process:
+1052                        # _before_ saving state, check if this save would set us over the `checkpoints_total_limit`
+1053                        if args.checkpoints_total_limit is not None:
+1054                            checkpoints = os.listdir(args.output_dir)
+1055                            checkpoints = [d for d in checkpoints if d.startswith("checkpoint")]
+1056                            checkpoints = sorted(checkpoints, key=lambda x: int(x.split("-")[1]))
+1057
+1058                            # before we save the new checkpoint, we need to have at _most_ `checkpoints_total_limit - 1` checkpoints
+1059                            if len(checkpoints) >= args.checkpoints_total_limit:
+1060                                num_to_remove = len(checkpoints) - args.checkpoints_total_limit + 1
+1061                                removing_checkpoints = checkpoints[0:num_to_remove]
+1062
+1063                                logger.info(
+1064                                    f"{len(checkpoints)} checkpoints already exist, removing {len(removing_checkpoints)} checkpoints"
+1065                                )
+1066                                logger.info(f"removing checkpoints: {', '.join(removing_checkpoints)}")
+1067
+1068                                for removing_checkpoint in removing_checkpoints:
+1069                                    removing_checkpoint = os.path.join(args.output_dir, removing_checkpoint)
+1070                                    shutil.rmtree(removing_checkpoint)
+1071
+1072                        save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}")
+1073                        accelerator.save_state(save_path)
+1074                        logger.info(f"Saved state to {save_path}")
+1075
+1076            logs = {"step_loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]}
+1077            progress_bar.set_postfix(**logs)
+1078
+1079            if global_step >= args.max_train_steps:
+1080                break
+1081
+1082        if accelerator.is_main_process:
+1083            if args.validation_prompts is not None and epoch % args.validation_epochs == 0:
+1084                if args.use_ema:
+1085                    # Store the UNet parameters temporarily and load the EMA parameters to perform inference.
+1086                    ema_unet.store(unet.parameters())
+1087                    ema_unet.copy_to(unet.parameters())
+1088                log_validation(
+1089                    vae,
+1090                    text_encoder,
+1091                    tokenizer,
+1092                    unet,
+1093                    args,
+1094                    accelerator,
+1095                    weight_dtype,
+1096                    global_step,
+1097                )
+1098                if args.use_ema:
+1099                    # Switch back to the original UNet parameters.
+1100                    ema_unet.restore(unet.parameters())
+1101
+1102    # Create the pipeline using the trained modules and save it.
+1103    accelerator.wait_for_everyone()
+1104    if accelerator.is_main_process:
+1105        unet = unwrap_model(unet)
+1106        if args.use_ema:
+1107            ema_unet.copy_to(unet.parameters())
+1108
+1109        pipeline = StableDiffusionPipeline.from_pretrained(
+1110            args.pretrained_model_name_or_path,
+1111            text_encoder=text_encoder,
+1112            vae=vae,
+1113            unet=unet,
+1114            revision=args.revision,
+1115            variant=args.variant,
+1116        )
+1117        pipeline.save_pretrained(args.output_dir)
+1118
+1119        # Run a final round of inference.
+1120        images = []
+1121        if args.validation_prompts is not None:
+1122            logger.info("Running inference for collecting generated images...")
+1123            pipeline = pipeline.to(accelerator.device)
+1124            pipeline.torch_dtype = weight_dtype
+1125            pipeline.set_progress_bar_config(disable=True)
+1126
+1127            if args.enable_xformers_memory_efficient_attention:
+1128                pipeline.enable_xformers_memory_efficient_attention()
+1129
+1130            if args.seed is None:
+1131                generator = None
+1132            else:
+1133                generator = torch.Generator(device=accelerator.device).manual_seed(args.seed)
+1134
+1135            for i in range(len(args.validation_prompts)):
+1136                with torch.autocast("cuda"):
+1137                    image = pipeline(args.validation_prompts[i], num_inference_steps=20, generator=generator).images[0]
+1138                images.append(image)
+1139
+1140        if args.push_to_hub:
+1141            save_model_card(args, repo_id, images, repo_folder=args.output_dir)
+1142            upload_folder(
+1143                repo_id=repo_id,
+1144                folder_path=args.output_dir,
+1145                commit_message="End of training",
+1146                ignore_patterns=["step_*", "epoch_*"],
+1147            )
+1148
+1149    accelerator.end_training()
+1150
+1151
+1152if __name__ == "__main__":
+1153    main()
+
+
+
 1export MODEL_NAME="CompVis/stable-diffusion-v1-4"
+ 2export DATASET_NAME="lambdalabs/naruto-blip-captions"
+ 3
+ 4accelerate launch --mixed_precision="fp16"  train_text_to_image.py \
+ 5--pretrained_model_name_or_path=$MODEL_NAME \
+ 6--dataset_name=$DATASET_NAME \
+ 7--use_ema \
+ 8--resolution=512 --center_crop --random_flip \
+ 9--train_batch_size=1 \
+10--gradient_accumulation_steps=4 \
+11--gradient_checkpointing \
+12--max_train_steps=15000 \
+13--learning_rate=1e-05 \
+14--max_grad_norm=1 \
+15--lr_scheduler="constant" --lr_warmup_steps=0 \
+16--output_dir="sd-pokemon-model"
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/sd_webui/index.html b/sources/sd_webui/index.html new file mode 100644 index 0000000..3399d83 --- /dev/null +++ b/sources/sd_webui/index.html @@ -0,0 +1,158 @@ + + + + + + + + + Stable-Diffusion-WebUI — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+ + +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/sd_webui/install.html b/sources/sd_webui/install.html new file mode 100644 index 0000000..6d05292 --- /dev/null +++ b/sources/sd_webui/install.html @@ -0,0 +1,212 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本文面向昇腾开发者,帮助开发者完成stable-diffusion-webui在昇腾上的安装

+
+

备注

+

请确保环境安装了对应的固件和驱动,详情请参考 快速安装昇腾环境

+
+
+

安装miniconda

+
1mkdir -p ~/miniconda3
+2wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh -O ~/miniconda3/miniconda.sh
+3bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
+4rm -rf ~/miniconda3/miniconda.sh
+5~/miniconda3/bin/conda init bash
+6~/miniconda3/bin/conda init zsh
+
+
+
+
+

使用conda创建环境

+
1conda create -n python310 python=3.10.6
+2conda activate python310
+
+
+
+
+

安装stable-diffusion-webui

+
    +
  • 自动安装命令如下:

  • +
+
1git clone --branch dev https://github.com/AUTOMATIC1111/stable-diffusion-webui.gitcd stable-diffusion-webui
+2
+3#此命令将在首次安装时自动在 Ascend 设备上安装 torch 和 torch_npu。
+4./webui.sh --listen --skip-torch-cuda-test --no-half
+
+
+
    +
  • 手动安装:

  • +
+
 1# install stable-diffusion-webui
+ 2git clone --branch dev https://github.com/AUTOMATIC1111/stable-diffusion-webui.git
+ 3cd stable-diffusion-webui
+ 4python -m venv venv
+ 5source ./venv/bin/activate
+ 6pip install torch==2.1.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+ 7pip install torch_npu==2.1.0
+ 8pip install https://github.com/openai/CLIP/archive/d50d76daa670286dd6cacf3bcd80b5e4823fc8e1.zip --prefer-binary
+ 9pip install https://github.com/mlfoundations/open_clip/archive/bb6e834e9c70d9c27d0dc3ecedeebeaeb1ffad6b.zip
+10pip install -U -I --no-deps xformers==0.0.23.post1
+11pip install install ngrok
+12mkdir repositories
+13git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui-assets.git stable-diffusion-webui-assets
+14git -C stable-diffusion-webui-assets checkout 6f7db241d2f8ba7457bac5ca9753331f0c266917
+15git clone https://github.com/Stability-AI/stablediffusion.git stable-diffusion-stability-ai
+16git -C stable-diffusion-stability-ai checkout cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf
+17git clone https://github.com/Stability-AI/generative-models.git generative-models
+18git -C generative-models checkout 45c443b316737a4ab6e40413d7794a7f5657c19f
+19git clone https://github.com/crowsonkb/k-diffusion.git k-diffusion
+20git -C k-diffusion checkout ab527a9a6d347f364e3d185ba6d714e22d80cb3c
+21git clone https://github.com/salesforce/BLIP.git BLIP
+22git -C BLIP checkout 48211a1594f1321b00f14c9f7a5b4813144b2fb9
+23pip install -r requirements.txt
+24pip install -r requirements_npu.txt
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/sd_webui/quick_start.html b/sources/sd_webui/quick_start.html new file mode 100644 index 0000000..f118800 --- /dev/null +++ b/sources/sd_webui/quick_start.html @@ -0,0 +1,233 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装指南 准备好昇腾环境及stable-diffusion-webui!

+
+
+

参数说明

+
+

主要参数

+
+../../_images/mainparameters.png +
+
    +
  • Stable Diffusion checkpoint

  • +
+

用于更换模型文件,v1-5-pruned-emaonly.safetensors为stable-diffusion-webui的默认模型文件,更换其他模型文件需自行下载。

+
    +
  • Prompt

  • +
+

正面提示词,构成提示词的基础,直接描述想要生成的图像内容、风格、情感等作为元素权重的关键词,让AI更倾向于在绘图中绘制和Prompt的内容相关的元素。

+
    +
  • Negative Prompt

  • +
+

反向提示词,作用与Prompt相反,反向加权的权重关系,减少某些元素出现的频率,从而约束AI的行为。

+
    +
  • Generate

  • +
+

即开始生成图片按钮。

+
+
+

其他参数

+
+../../_images/moreparameters.png +
+
    +
  • Sampling method

  • +
+

即采样方法,采样方法本身并没有绝对意义上的优劣之分,只有是否合适这一说:

+
+
    +
  • Euler方法,是比较成熟的一种采样方法,效果比较稳定

  • +
  • LMS:这个是最小均方误差算法,这是一个自适应的滤波器。

  • +
  • Heun:这个是建立在欧拉方法基础上的一个在给定初始条件下求解常微分方程的方法。

  • +
  • DPM:这是一个深度学习的PDE(偏微分方程)增强方法。

  • +
+
+
    +
  • Sampling Steps

  • +
+

即采样步长,它并不是越大越好,同样也不是越小越好,太小采样的随机性会很高,太大采样的效率会很低,拒绝概率高。

+
    +
  • seed

  • +
+

seed即为种子,-1时生成一个随机数,这个随机数影响画面的内容,相当于手动初始了神经网络的权重参数,在配合其他相同参数的情况下能得到一个极其类似的结果。

+
    +
  • Width & Height

  • +
+

生成图片的宽和高

+
+
+
+

文生图

+

文生图就是根据文字生成图片,主要操作为点击Stable Diffusion checkpoint选择模型,在Prompt和Negative Prompt填入提示词,点击Generate按钮生成图片。

+

以下是根据提示词生成的图片:

+

Prompt:a cute cat

+

Negative Prompt:deformed, lowres, bad anatomy

+
+../../_images/cat.png +
+
+
+

图生图

+

图生图(img2img)是让AI参照现有的图片生图:

+

如上传一张真人照片,让AI把他改绘成动漫人物;上传画作线稿,让AI自动上色;上传一张黑白照,让AI把它修复成彩色相片。

+

参数和操作与文生图重叠,这里不在赘述。

+

以下是图片生成的效果:

+

Prompt:a cute cat wear a hat

+

Negative Prompt:deformed, lowres, bad anatomy

+
+../../_images/catwearhat.png +
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/sentence_transformers/index.html b/sources/sentence_transformers/index.html new file mode 100644 index 0000000..728ce1f --- /dev/null +++ b/sources/sentence_transformers/index.html @@ -0,0 +1,156 @@ + + + + + + + + + Sentence Transformers — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+ + +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/sentence_transformers/install.html b/sources/sentence_transformers/install.html new file mode 100644 index 0000000..78b1972 --- /dev/null +++ b/sources/sentence_transformers/install.html @@ -0,0 +1,175 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本教程面向使用 sentence-transformers & 昇腾的开发者,帮助完成昇腾环境下 sentence-transformers 的安装。

+
+

昇腾环境安装

+

请根据已有昇腾产品型号及 CPU 架构等按照 快速安装昇腾环境指引 进行昇腾环境安装。

+
+

警告

+

CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。

+
+
+
+

sentence-transformers 下载安装

+
    +
  1. 安装项目所需依赖

  2. +
+
pip install sentence-transformers -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
    +
  1. 安装 torch_npu

  2. +
+
pip install torch==2.1.0 torch_npu==2.1.0.post6 -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+

提示

+

torch_npu 的版本需要匹配 torch 的版本,详细信息请参考:Ascend Extension for PyTorch

+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/sentence_transformers/quick_start.html b/sources/sentence_transformers/quick_start.html new file mode 100644 index 0000000..407d55b --- /dev/null +++ b/sources/sentence_transformers/quick_start.html @@ -0,0 +1,183 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境及 sentence-transformers !

+
+

本教程以 all-MiniLM-L6-v2 模型为例,讲述如何使用 sentence-transformers 在昇腾 NPU 上实现文本数据的 Embedding。

+
+

前置准备

+

本篇样例代码为 sentence-transformers 的官方样例,需提前进行下载:

+
git clone https://github.com/UKPLab/sentence-transformers.git
+
+
+
+
+

使用模型

+

进入 sentence-transformers 项目目录,依次执行如下命令:

+
cd examples/applications/computing-embeddings
+python computing_embeddings.py
+
+
+

出现如下日志则代表执行成功:

+
2024-10-15 08:11:36 - Use pytorch device_name: npu
+2024-10-15 08:11:36 - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
+[W compiler_depend.ts:623] Warning: expandable_segments currently defaults to false. You can enable this feature by `export PYTORCH_NPU_ALLOC_CONF = expandable_segments:True`. (function operator())
+Batches: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s]
+Sentence: This framework generates embeddings for each input sentence
+Embedding: [-0.01375547 -0.04301599 -0.01562478 ...  0.10029524  0.12379668 -0.04230832]
+
+Sentence: Sentences are passed as a list of string.
+Embedding: [ 0.05640831  0.05488579  0.03137118 ...  0.06652435  0.08493122 -0.03337045]
+
+Sentence: The quick brown fox jumps over the lazy dog.
+Embedding: [0.04393559 0.05903088 0.04824848 ... 0.05215353 0.05615513 0.10205095]
+
+
+

可以看到该模型成功生成了这些句子对应的 Embedding 向量。

+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/timm/index.html b/sources/timm/index.html new file mode 100644 index 0000000..6324fc8 --- /dev/null +++ b/sources/timm/index.html @@ -0,0 +1,161 @@ + + + + + + + + + timm — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/sources/timm/install.html b/sources/timm/install.html new file mode 100644 index 0000000..cc9e6b5 --- /dev/null +++ b/sources/timm/install.html @@ -0,0 +1,202 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本教程面向使用 pytorch-image-models (timm) & 昇腾的开发者,帮助完成昇腾环境下 timm 的安装。

+
+

昇腾环境安装

+

请根据已有昇腾产品型号及CPU架构等按照 快速安装昇腾环境指引 进行昇腾环境安装。

+
+

警告

+

CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。

+
+
+
+

Python 环境创建

+
1# 创建名为 timm 的 python 3.10 的虚拟环境
+2conda create -y -n timm python=3.10
+3# 激活虚拟环境
+4conda activate <your_env_name>
+
+
+
+
+

timm 安装

+

使用以下指令安装 timm:

+
1pip install timm -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+
+

torch-npu 安装

+

按照 torch-npu 安装指引 安装 2.2.0 版本 torch 和 torch-npu,或使用以下指令快速安装:

+
1# install the dependencies
+2pip3 install attrs numpy==1.26.4 decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions -i https://pypi.tuna.tsinghua.edu.cn/simple
+3# install torch and torch-npu
+4pip install torch==2.2.0 torch-npu==2.2.0 -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+
+

安装校验

+

使用以下 Python 脚本对 open_clip 的安装进行校验,正确打印 open_clip 的版本号和 NPU 卡号说明安装成功。

+
1import torch
+2import torch_npu
+3import timm
+4
+5print("timm version:", timm.version.__version__)
+6print("NPU devices:", torch.npu.current_device())
+
+
+

正确回显如下(单卡 NPU 环境):

+
timm version: 1.0.8.dev0
+NPU devices: 0
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/timm/quick_start.html b/sources/timm/quick_start.html new file mode 100644 index 0000000..37dfcbd --- /dev/null +++ b/sources/timm/quick_start.html @@ -0,0 +1,273 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境及 timm !

+
+

本文档帮助昇腾开发者快速使用 timm × 昇腾 进行训练和推理。

+
+

导入 torch-npu

+

首先在入口脚本(如本文档中的 train.pyvalidate.pyinference.py)导入 torch 后,导入 torch-npu:

+
1import torch
+2import torch-npu
+
+
+
+
+

单卡/分布式训练

+

ImageNet-1000 数据集的训练为例,使用以下脚本启动单卡/多卡 NPU 上基于 timm 的图像分类模型训练:

+
+

备注

+

请根据您的 NPU 环境指定 NPU 卡数量 num_npus 和模型名称/路径 model ,并替换数据集路径 path/to/dataset/ImageNet-1000

+
+
 1num_npus=1
+ 2./distributed_train.sh $num_npus path/to/dataset/ImageNet-1000 \
+ 3    --device npu \
+ 4    --model seresnet34 \
+ 5    --sched cosine \
+ 6    --epochs 150 \
+ 7    --warmup-epochs 5 \
+ 8    --lr 0.4 \
+ 9    --reprob 0.5 \
+10    --remode pixel \
+11    --batch-size 256 \
+12    --amp -j 4
+
+
+
+
+

模型验证

+
+

备注

+

请根据实际情况替换验证集数据路径 path/to/data 、模型路径 path/to/model

+
+
1python validate.py path/to/data --device npu --model path/to/model --batch-size 64 --pretrained
+
+
+

正常输出验证过程日志及最终验证结果 result 说明验证成功,如下为一种示例(根据模型及数据集不同,日志会有区别):

+
Validating in float32. AMP not enabled.
+Loading pretrained weights from Hugging Face hub (timm/tiny_vit_21m_512.dist_in22k_ft_in1k)
+[timm/tiny_vit_21m_512.dist_in22k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
+Model ./model_ckpts/tiny_vit_21m_512 created, param count: 21268120
+Data processing configuration for current model + dataset:
+        input_size: (3, 512, 512)
+        interpolation: bicubic
+        mean: (0.485, 0.456, 0.406)
+        std: (0.229, 0.224, 0.225)
+        crop_pct: 1.0
+        crop_mode: squash
+Test: [   0/157]  Time: 7.083s (7.083s,    9.04/s)  Loss:  0.4765 (0.4765)  Acc@1:  93.750 ( 93.750)  Acc@5:  96.875 ( 96.875)
+Test: [  10/157]  Time: 0.400s (1.008s,   63.50/s)  Loss:  0.6594 (0.4929)  Acc@1:  78.125 ( 87.926)  Acc@5:  98.438 ( 98.011)
+Test: [  20/157]  Time: 0.399s (0.719s,   89.04/s)  Loss:  0.1891 (0.4682)  Acc@1:  96.875 ( 89.435)  Acc@5: 100.000 ( 98.289)
+
+... ...
+
+* Acc@1 86.040 (13.960) Acc@5 97.750 (2.250)
+--result
+{
+    "model": "./model_ckpts/tiny_vit_21m_512",
+    "top1": 86.04,
+    "top1_err": 13.96,
+    "top5": 97.75,
+    "top5_err": 2.25,
+    "param_count": 21.27,
+    "img_size": 512,
+    "crop_pct": 1.0,
+    "interpolation": "bicubic"
+}
+
+
+
+
+

模型推理

+
+

备注

+

请根据实际情况替换验证集数据路径 path/to/data 和模型权重路径 path/to/checkpoint/model_best.pth.tar

+
+
1python inference.py ../open_clip/data/ImageNet-1000/val/ \
+2    --device npu \
+3    --batch-size 64 \
+4    --model ./model_ckpts/tiny_vit_21m_512 \
+5    --label-type detail \
+6    --topk 5
+
+
+

正常输出验证过程日志及最终验证结果 result 说明验证成功,如下为一种示例(根据模型及数据集不同,日志会有区别):

+
Running inference in float32. AMP not enabled.
+Loading pretrained weights from Hugging Face hub (timm/tiny_vit_21m_512.dist_in22k_ft_in1k)
+[timm/tiny_vit_21m_512.dist_in22k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
+Model ./model_ckpts/tiny_vit_21m_512 created, param count: 21268120
+Predict: [0/157] Time 6.418 (6.418)
+Predict: [10/157] Time 0.394 (0.942)
+Predict: [20/157] Time 0.427 (0.708)
+
+... ...
+
+"ILSVRC2012_val_00005844.JPEG":{
+    "label":[
+        "stinkhorn, carrion fungus: any of various ill-smelling brown-capped fungi of the order Phallales",
+        "earthstar: any fungus of the family Geastraceae; in form suggesting a puffball whose outer peridium splits into the shape of a star",
+        "coral fungus: any of numerous fungi of the family Clavariaceae often brightly colored that grow in often intricately branched clusters like coral",
+        "mushroom: fleshy body of any of numerous edible fungi",
+        "gyromitra: any fungus of the genus Gyromitra"
+    ],
+    "prob":[
+        0.878154695,
+        0.0030552391,
+        0.0012754521,
+        0.0010740706,
+        0.000946458
+    ]
+},
+
+... ...
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/transformers/fine-tune.html b/sources/transformers/fine-tune.html new file mode 100644 index 0000000..f7c20f1 --- /dev/null +++ b/sources/transformers/fine-tune.html @@ -0,0 +1,367 @@ + + + + + + + + + 微调预训练模型 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

微调预训练模型

+
+

备注

+

阅读本篇前,请确保已按照 安装指南 准备好昇腾环境及transformers!

+
+

大模型微调本质是利用特定领域的数据集对已预训练的大模型进行进一步训练的过程。它旨在优化模型在特定任务上的性能,使模型能够更好地适应和完成特定领域的任务。 +本文在使用transformers库选定相关数据集和预训练模型的基础上,通过超参数调优完成对模型的微调。

+
+

前置准备

+
+

安装必要库

+
1pip install transformers datasets evaluate accelerate scikit-learn
+
+
+
+
+

加载数据集

+

模型训练需要使用数据集,这里使用 Yelp Reviews dataset

+
1from datasets import load_dataset
+2
+3# load_dataset 会自动下载数据集并将其保存到本地路径中
+4dataset = load_dataset("yelp_review_full")
+5#输出数据集的第100条数据
+6dataset["train"][100]
+
+
+

输出如下:

+
{'label': 0, 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularly...that takes something special!\\n
+The cashier took my friends\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the
+person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\'s meal. After watching two people who ordered after
+me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \\"serving off their orders\\" when they didn\'t have
+their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\\nThe
+manager was rude when giving me my order. She didn\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that
+I felt I was getting poor service.\\nI\'ve eaten at various McDonalds restaurants for over 30 years. I\'ve worked at more than one location. I expect
+bad days, bad moods, and the occasional mistake. But I have yet to have a decent experience at this store. It will remain a place I avoid unless someone
+in my party needs to avoid illness from low blood sugar. Perhaps I should go back to the racially biased service of Steak n Shake instead!'}
+
+
+
+
+

预处理数据集

+

预处理数据集需要使用AutoTokenizer,它用来自动获取与模型匹配的分词器,分词器根据规则将文本拆分为标记,并转换为张量作为模型输入, +下面用到了Meta-Llama-3-8B-Instruct模型,下载模型请转至 模型获取,以下是一个示例:

+
1from transformers import AutoTokenizer
+2
+3tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+4#使用分词器处理文本
+5encoded_input = tokenizer("Do not meddle in the affairs of wizards, for they are subtle and quick to anger.")
+6print(encoded_input)
+
+
+

输出如下:

+
{'input_ids': [128000, 5519, 539, 1812, 91485, 304, 279, 22747, 315, 89263, 11, 369, 814, 527, 27545, 323, 4062, 311, 19788, 13],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+
+

接着使用dataset.map方法对数据集进行预处理:

+
1def tokenize_function(examples):
+2    return tokenizer(examples["text"], padding="max_length", truncation=True)
+3
+4tokenized_datasets = dataset.map(tokenize_function, batched=True)
+
+
+

初次进行预处理需要一定时间,内容如下:

+
1Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no padding.
+2Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
+3Map: 100%|████████████████████████████████████████████████████████████████████████| 650000/650000 [03:27<00:00, 3139.47 examples/s]
+4Map: 100%|██████████████████████████████████████████████████████████████████████████| 50000/50000 [00:15<00:00, 3156.92 examples/s]
+
+
+

训练全部的数据集会耗费更长的时间,通常将其划分为较小的训练集和验证集,以提高训练速度:

+
1small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+2small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+3
+4# 下面是加载全训练集和验证集
+5# full_train_dataset = tokenized_datasets["train"]
+6# full_eval_dataset = tokenized_datasets["test"]
+
+
+
+
+
+

训练

+
+

加载模型

+

使用AutoModelForCausalLM将自动加载模型:

+
1from transformers import AutoModelForCausalLM
+2
+3model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+
+
+
+
+

超参数调优

+

超参数调优用于激活不同训练选项的标志,它定义了关于模型的更高层次的概念,例如模型复杂程度或学习能力,下边使用TrainingArguments类来加载:

+
1from transformers import TrainingArguments
+2
+3training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
+
+
+
+
+

模型评估

+

模型评估用于衡量模型在给定数据集上的表现,包括准确率,完全匹配速率,平均并交集点等,下面是使用方式:

+
 1import
+ 2import sklearn
+ 3import evaluate
+ 4
+ 5metric = evaluate.load("accuracy")
+ 6
+ 7#计算预测的准确性,并将预测传递给compute
+ 8def compute_metrics(eval_pred):
+ 9    logits, labels = eval_pred
+10    predictions = np.argmax(logits, axis=-1)
+11    return metric.compute(predictions=predictions, references=labels)
+
+
+
+
+

Trainer

+

使用已加载的模型、训练参数、训练和测试数据集以及评估函数创建一个Trainer对象,并调用trainer.train()来微调模型:

+
 1from transformers import Trainer
+ 2
+ 3trainer = Trainer(
+ 4    model=model,
+ 5    args=training_args,
+ 6    train_dataset=small_train_dataset,
+ 7    eval_dataset=small_eval_dataset,
+ 8    compute_metrics=compute_metrics,
+ 9)
+10
+11trainer.train()
+
+
+
+
+
+

预训练全流程

+
 1import torch
+ 2import torch_npu
+ 3import numpy as np
+ 4import sklearn
+ 5import evaluate
+ 6from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
+ 7from datasets import load_dataset
+ 8
+ 9model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+10device = "npu:0" if torch.npu.is_available() else "cpu"
+11
+12# 加载分词器和模型
+13tokenizer = AutoTokenizer.from_pretrained(model_id)
+14model = AutoModelForCausalLM.from_pretrained(
+15    model_id,
+16    torch_dtype=torch.bfloat16,
+17    device_map="auto",
+18).to(device)
+19
+20dataset = load_dataset("yelp_review_full")
+21
+22#分词函数
+23def tokenize_function(examples):
+24    return tokenizer(examples["text"], padding="max_length", truncation=True)
+25
+26tokenized_datasets = dataset.map(tokenize_function, batched=True)
+27
+28small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+29small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+30
+31# 加载评估指标
+32metric = evaluate.load("accuracy")
+33
+34# 定义评估指标的计算函数
+35def compute_metrics(eval_pred):
+36    logits, labels = eval_pred
+37    predictions = np.argmax(logits, axis=-1)
+38    return metric.compute(predictions=predictions, references=labels)
+39
+40training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
+41
+42trainer = Trainer(
+43    model=model,
+44    args=training_args,
+45    train_dataset=small_train_dataset,
+46    eval_dataset=small_eval_dataset,
+47    compute_metrics=compute_metrics,
+48)
+49
+50trainer.train()
+
+
+

训练完成后得到以下结果:

+
 1|█████████████████████████████████| [375/375 06:21, Epoch 3/3]
+ 2
+ 3=====  =============  ===============  ======
+ 4Epoch  Training Loss  Validation Loss  Accuracy
+ 5=====  =============  ===============  ======
+ 61       No log          1.155628    0.499000
+ 72       No log          0.994618    0.574000
+ 83       No log          1.026123    0.590000
+ 9=====  =============  ===============  ======
+10
+11TrainOutput(global_step=375, training_loss=1.0557311197916666, metrics={'train_runtime': 384.55, 'train_samples_per_second': 7.801,
+12'train_steps_per_second': 0.975, 'total_flos': 789354427392000.0, 'train_loss': 1.0557311197916666, 'epoch': 3.0})
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/transformers/index.html b/sources/transformers/index.html new file mode 100644 index 0000000..2879b50 --- /dev/null +++ b/sources/transformers/index.html @@ -0,0 +1,179 @@ + + + + + + + + + Transformers — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/sources/transformers/inference.html b/sources/transformers/inference.html new file mode 100644 index 0000000..83146de --- /dev/null +++ b/sources/transformers/inference.html @@ -0,0 +1,305 @@ + + + + + + + + + 推理 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

推理

+
+

备注

+

阅读本篇前,请确保已按照 安装指南 准备好昇腾环境及transformers!

+
+

在推理阶段,训练好的模型被用于对图像、语音或文本进行分类,也可以用于语言生成、翻译等。

+

本文的模型推理以transformers的pipeline为中心进行介绍,pipelines可以自动加载模型和能够进行任务推理的预处理类,使任何模型进行任何语言、计算机视觉、语音以及多模态任务的推理变得非常简单。

+
+

pipeline 抽象类

+

pipeline 抽象类是所有其他 pipeline 的封装,可以像其他任何 pipeline 一样实例化。

+

pipeline 参数由 task、tokenizer、model、optional 组成:

+
    +
  • task 将确定返回哪一个 pipeline,比如 text-classification 将会返回 TextClassificationPipeline,image-to-image 将会返回 ImageToImagePipeline。

  • +
  • tokenizer分词器是用来将输入进行编码,str或者PreTrainedTokenizer,如果未提供将使用model参数,如果model也未提供或者非str,将使用config参数,如果config参数也未提供或者非str,将提供task的默认tokenizer。

  • +
  • model是模型,str或者PreTrainedModel,一般为有.bin模型文件的目录。

  • +
  • optional其他参数包括,config、feature_extractor、device、device_map等。

  • +
+
+
+

pipeline 使用

+

pipeline适用于音频、计算机视觉、自然语言处理和多模态任务,下面将介绍它在各场景的使用方式。

+
+

音频

+
+

音频识别

+

用于提取某些音频中包含的文本,如下创建pipeline,并输入音频文件:

+
1from transformers import pipeline
+2
+3transcriber = pipeline(task="automatic-speech-recognition")
+4transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+5
+6#以下为输出示例
+7{'text': 'I HAVE A DREAM BUT ONE DAY THIS NATION WILL RISE UP LIVE UP THE TRUE MEANING OF ITS TREES'}
+
+
+
+
+

文本转音频

+

根据输入文本和可选的其他条件输入生成音频文件:

+
1from transformers import pipeline
+2
+3pipe = pipeline(model="suno/bark-small")
+4output = pipe("Hey it's HuggingFace on the phone!")
+5
+6audio = output["audio"]
+7sampling_rate = output["sampling_rate"]
+
+
+
+
+
+

计算机视觉

+
+

图像分类

+

图像分类可以识别图片特征,并给出分类标签和置信度得分:

+
1from transformers import pipeline
+2
+3classifier = pipeline(model="microsoft/beit-base-patch16-224-pt22k-ft22k")
+4classifier("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
+5
+6#以下为输出示例
+7[{'score': 0.442, 'label': 'macaw'}, {'score': 0.088, 'label': 'popinjay'}, {'score': 0.075, 'label': 'parrot'}, {'score': 0.073, 'label': 'parodist, lampooner'}, {'score': 0.046, 'label': 'poll, poll_parrot'}]
+
+
+
+
+

图像转图像

+

它可以将图像根据信息生成新图像,以下示例通过图像超分辨率模型将低分辨率图像放大并增强其细节,使其看起来更清晰:

+
 1from PIL import Image
+ 2import requests
+ 3from transformers import pipeline
+ 4
+ 5upscaler = pipeline("image-to-image", model="caidas/swin2SR-classical-sr-x2-64")
+ 6img = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw)
+ 7img = img.resize((64, 64))
+ 8upscaled_img = upscaler(img) #超分辨率处理
+ 9print(img.size)
+10print(upscaled_img.size)
+11
+12#以下为输出示例
+13(64, 64)    # 输出原图像的尺寸
+14(144, 144)  # 输出处理后图像的尺寸
+
+
+
+
+
+

自然语言处理

+
+

文本分类

+

根据标签对文本进行分类:

+
1from transformers import pipeline
+2classifier = pipeline(model="meta-llama/Meta-Llama-3-8B-Instruct")
+3classifier(
+4    "I have a problem with my iphone that needs to be resolved asap!!",
+5    candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
+6)
+7#以下为输出示例
+8#{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}
+
+
+
+
+

文本生成

+

根据文本生成对话响应:

+
1from transformers import pipeline
+2
+3generator = pipeline(model="HuggingFaceH4/zephyr-7b-beta")
+4# Zephyr-beta is a conversational model, so let's pass it a chat instead of a single string
+5generator([{"role": "user", "content": "What is the capital of France? Answer in one word."}], do_sample=False, max_new_tokens=2)
+6
+7#以下为输出示例
+8[{'generated_text': [{'role': 'user', 'content': 'What is the capital of France? Answer in one word.'}, {'role': 'assistant', 'content': 'Paris'}]}]
+
+
+
+
+
+

多模态

+
+

视觉问答

+

VQA使用图像和关于该图像的问题进行提问,图像可以是URL或图像的本地路径:

+
 1from transformers import pipeline
+ 2vqa = pipeline(model="meta-llama/Meta-Llama-3-8B-Instruct")
+ 3output = vqa(
+ 4    image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png",
+ 5    question="What is the invoice number?",
+ 6)
+ 7output[0]["score"] = round(output[0]["score"], 3)
+ 8
+ 9#以下为输出示例
+10#[{'score': 0.425, 'answer': 'us-001', 'start': 16, 'end': 16}]
+
+
+
+
+

图像转文本

+

用于预测给定图像的主题:

+
1from transformers import pipeline
+2
+3captioner = pipeline(model="ydshieh/vit-gpt2-coco-en")
+4captioner("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
+5
+6#以下为输出示例
+7[{'generated_text': 'two birds are standing next to each other '}]
+
+
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/transformers/install.html b/sources/transformers/install.html new file mode 100644 index 0000000..04df585 --- /dev/null +++ b/sources/transformers/install.html @@ -0,0 +1,226 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本文将介绍如何在昇腾环境下使用transfomers,帮助开发者完成transformers的安装。

+
+

备注

+

请确保环境安装了对应的固件和驱动,详情请参考 快速安装昇腾环境

+
+
+

创建虚拟环境

+

首先需要安装并激活python环境:

+
conda create -n your_env_name python=3.10
+conda activate your_env_name
+
+
+

同时安装依赖库:

+
# install torch
+pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple torch==2.2.0
+
+# install torch-npu
+pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple torch-npu==2.2.0
+
+
+
+
+

安装transformers

+

直接使用pip命令进行安装:

+
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple transformers
+
+
+
+
+

验证安装

+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+import torch
+import torch_npu
+
+# 检查 NPU 是否可用
+if torch.npu.is_available():
+    device = torch.device("npu:0")
+    print("NPU is available. Using NPU.")
+else:
+    device = torch.device("cpu")
+    print("NPU is not available. Using CPU.")
+
+model_id = "bert-base-uncased"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForSequenceClassification.from_pretrained(model_id)
+
+model.to(device)
+
+nlp_pipeline = pipeline(
+    "sentiment-analysis",
+    model=model,
+    tokenizer=tokenizer,
+    device=0 if torch.npu.is_available() else -1
+)
+
+#分析句子情感并输出
+result = nlp_pipeline("This is a test sentence.")
+print(result)
+
+
+

如果成功运行并输出下面内容,则安装成功:

+
NPU is available. Using NPU.
+Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+[{'label': 'POSITIVE', 'score': 0.9998704791069031}]
+
+
+
+
+

卸载transformers

+
pip uninstall transformers
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/transformers/modeldownload.html b/sources/transformers/modeldownload.html new file mode 100644 index 0000000..1255afc --- /dev/null +++ b/sources/transformers/modeldownload.html @@ -0,0 +1,260 @@ + + + + + + + + + 模型获取 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

模型获取

+

本文以Meta-Llama-3-8B-Instruct模型为例,介绍如何进行模型的获取, +该模型获取目前主要有三种方式,Meta官方HuggingFacehf-mirror, 下面将详细说明这三种获取模型的方法。

+
+

Meta官方

+

下载模型前需要获取licence,前往 Meta官网,提供信息获取到许可证,拿到已签名的URL。

+
    +
  • 链接类似于下面:

  • +
+
1https://download6.llamameta.net/*?Policy=eyJTdGF0ZW1lbnQiOlt7InVuaXF1ZV9oYXNoIjoibGJuYXc0bzdrY2pqNnoxeXZ1N3hmcmNvIiwiUmVzb3VyY2UiOiJodHRwczp
+2cL1wvZG93bmxvYWQ2LmxsYW1hbWV0YS5uZXRcLyoiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE3MTY0MzYyMTF9fX1dfQ__&Signature=KTyc
+3LZkPxqMYY0XqW047tNN9IWX%7EOxlQbqCsDqmcX0vE8oia3Qej-x6aGFQSJhkHRULu8Efso5Qde8KRiptK5rGh9oLrtMeAS3SID%7EOyk38o9NNLKxWokA7yQxwvUVRqibVMJyhkE8XE
+4K2HDNftKT9KLaDG8HHFQmGWuhdTJSvCezJIRKWPtzRf0dohepOiOHOcQW%7Ermo7m6iI595PuoX7o3bVYpFYQf1Syrp05XCr9t2-Rzf8xaIYF5-2vFqELFyFyJys%7E5lA4178elcJcU
+5ImSSokn1IJBARAZ0iLaWDFsuTbvDJmz9j-ccHFJzgDPCMLQjHpK6QfCk4TWGmdyXMg__&Key-Pair-Id=K15QRJLYKIFSLZ&Download-Request-ID=1502880093958574
+
+
+
    +
  • 之后获取源码,使用以下命令下载并进入到工作目录:

  • +
+
1git clone https://github.com/meta-llama/llama3.git
+2cd llama3
+
+
+
    +
  • 运行脚本:

  • +
+
1./download.sh
+
+
+

运行时输入上边获取到的URL,即可进行模型的下载。

+
+
+

HuggingFace

+

HuggingFace同样需要获得licence,访问仓库 meta-llama/Meta-Llama-3-8B-Instruct ,接受许可后等待请求获得批准即可。

+

得到权限后,点击"文件和版本"标签,下载原始文件夹的内容或通过以下命令行下载:

+
    +
  • 安装huggingface-hub:

  • +
+
pip install huggingface-hub
+
+
+
    +
  • 下载文件:

  • +
+
huggingface-cli download meta-llama/Meta-Llama-3-8B-Instruct --include “original/*” --local-dir meta-llama/Meta-Llama-3-8B-Instruct
+
+
+

以上两种方法国内用户可能无法完成,下面推荐 国内用户 的获取模型的方式。

+
+
+

hf-mirror

+

hf-mirror是更适合国内用户获取模型的方式,它是HuggingFace平台的镜像网站, 提供了一个备用的域名来访问HuggingFace的资源和功能, +以 Qwen2-7B-Instruct 为例(Meta-Llama-3-8B-Instruct同样需要获取license,不方便国内用户, 这里用Qwen2代替说明), 共有三种方法,下面依次进行介绍。

+
+

直接下载

+

点击模型的下的 图标下载文件,如下:

+
+../../_images/downloadmodel.png +
+
+
+

修改镜像源

+
    +
  • 修改环境变量HF_ENDPOINT,该变量会替换huggingface.co域名:

  • +
+
1# 临时生效
+2export HF_ENDPOINT=https://hf-mirror.com
+3# 永久生效
+4echo export HF_ENDPOINT=https://hf-mirror.com >> ~/.bashrc
+
+
+
    +
  • 安装huggingface-hub:

  • +
+
pip install huggingface-hub
+
+
+
    +
  • 下载文件:

  • +
+
1# huggingface_hub下载单个文件
+2from huggingface_hub import hf_hub_download
+3hf_hub_download(repo_id="Qwen/Qwen2-7B-Instruct", filename="config.json", cache_dir="./your/path/Qwen")
+4
+5# huggingface_hub下载整个项目
+6from huggingface_hub import snapshot_download
+7snapshot_download(repo_id="Qwen/Qwen2-7B-Instruct", cache_dir="./your/path/Qwen")
+
+
+
+
+

git lfs

+

使用以下命令下载模型:

+
1# Make sure you have git-lfs installed (https://git-lfs.com)
+2git lfs install
+3
+4git clone https://hf-mirror.com/Qwen/Qwen2-7B-Instruct
+5
+6# If you want to clone without large files - just their pointers
+7# GIT_LFS_SKIP_SMUDGE=1 git clone https://hf-mirror.com/Qwen/Qwen2-7B-Instruct
+
+
+

使用以上任意一种方式即可完成模型的获取,将模型保存在本地路径后可以进行 微调预训练模型推理 等操作。

+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/transformers/quick_start.html b/sources/transformers/quick_start.html new file mode 100644 index 0000000..2d0f424 --- /dev/null +++ b/sources/transformers/quick_start.html @@ -0,0 +1,266 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装指南 准备好昇腾环境及transformers!

+
+

本文以Meta-Llama-3-8B-Instruct模型为例,介绍如何通过transformers使用模型进行推理, +针对模型推理transformers提供了 AutoModelForCausalLMpipeline 两种方式,下面将说明这两种接口的使用方式。

+
+

备注

+

以下模型用到了Meta-Llama-3-8B-Instruct, 具体可以参考 模型获取

+
+
+

AutoModelForCausalLM

+
 1import torch
+ 2import torch_npu
+ 3from transformers import AutoModelForCausalLM, AutoTokenizer
+ 4
+ 5model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+ 6device = "npu:0" if torch.npu.is_available() else "cpu"
+ 7
+ 8tokenizer = AutoTokenizer.from_pretrained(model_id)
+ 9model = AutoModelForCausalLM.from_pretrained(
+10    model_id,
+11    torch_dtype=torch.bfloat16,
+12    device_map="auto",
+13).to(device)
+
+
+
+
+

pipeline

+
 1import transformers
+ 2import torch
+ 3import torch_npu
+ 4
+ 5model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+ 6device = "npu:0" if torch.npu.is_available() else "cpu"
+ 7
+ 8pipeline = transformers.pipeline(
+ 9    "text-generation",
+10    model=model_id,
+11    model_kwargs={"torch_dtype": torch.bfloat16},
+12    device=device,
+13)
+
+
+
+
+

全流程

+
 1from transformers import AutoModelForCausalLM, AutoTokenizer
+ 2import torch
+ 3import torch_npu
+ 4
+ 5#如果提前下载好模型将meta-llama/Meta-Llama-3-8B-Instruct更换为本地地址
+ 6model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+ 7device = "npu:0"  if torch.npu.is_available() else "cpu" # 指定使用的设备为 NPU 0
+ 8
+ 9# 加载预训练的分词器
+10tokenizer = AutoTokenizer.from_pretrained(model_id)
+11
+12# 加载预训练的语言模型, 并指定数据类型为bfloat16, 自动选择设备映射
+13model = AutoModelForCausalLM.from_pretrained(
+14    model_id,
+15    torch_dtype=torch.bfloat16,
+16    device_map="auto",
+17).to(device) # 将模型移动到指定的设备
+18
+19# 定义消息列表,包含系统消息和用户消息
+20messages = [
+21    {"role": "system", "content": "You are a housekeeper chatbot who always responds in polite expression!"},
+22    {"role": "user", "content": "Who are you? what should you do?"},
+23]
+24
+25# 使用分词器将消息列表应用到聊天模板中,并转换为张量
+26input_ids = tokenizer.apply_chat_template(
+27    messages,
+28    add_generation_prompt=True,
+29    return_tensors="pt" # 返回 PyTorch 张量
+30).to(model.device)
+31
+32
+33# 定义终止标记,包括模型的结束标记 ID 和一个空标记 ID
+34terminators = [
+35    tokenizer.eos_token_id,
+36    tokenizer.convert_tokens_to_ids("<|eot_id|>")
+37]
+38
+39# 生成响应
+40outputs = model.generate(
+41    input_ids,
+42    max_new_tokens=256, # 设置生成的最大token
+43    eos_token_id=terminators,
+44    do_sample=True,
+45    temperature=0.6, # 设置采样温度,影响生成的多样性
+46    top_p=0.9,
+47)
+48
+49# 获取生成的响应,排除输入的部分
+50response = outputs[0][input_ids.shape[-1]:]
+51print(tokenizer.decode(response, skip_special_tokens=True))
+
+
+

输出示例:

+
 1Good day to you! My name is Housekeeper Helen, and I'm delighted to introduce myself as a friendly and efficient chatbot designed to assist with household tasks and provide helpful information.
+ 2As a housekeeper, my primary role is to ensure your home is tidy, organized, and comfortable. I'd be happy to help with:
+ 3
+ 4* Cleaning and organization tips
+ 5* Household chore schedules
+ 6* Laundry and ironing guidance
+ 7* Home maintenance advice
+ 8* And any other domestic-related queries you may have!
+ 9
+10Please feel free to ask me any questions or request my assistance with a specific task. I'm here to help make your life easier and your home sparkle!
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/trl/index.html b/sources/trl/index.html new file mode 100644 index 0000000..93fe24b --- /dev/null +++ b/sources/trl/index.html @@ -0,0 +1,156 @@ + + + + + + + + + Transformer Reinforcement Learning — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Transformer Reinforcement Learning

+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/trl/install.html b/sources/trl/install.html new file mode 100644 index 0000000..f9e2f66 --- /dev/null +++ b/sources/trl/install.html @@ -0,0 +1,180 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+ +
+
+ +
+

安装指南

+

本教程面向使用 TRL (Transformer Reinforcement Learning) & 昇腾的开发者,帮助完成昇腾环境下 TRL 的安装。

+
+

昇腾环境安装

+

请根据已有昇腾产品型号及 CPU 架构等按照 快速安装昇腾环境指引 进行昇腾环境安装。

+
+

警告

+

CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。

+
+
+
+

TRL 下载安装

+
    +
  1. 安装项目所需依赖

  2. +
+
pip install trl -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+

另外,本项目需要手动安装 transformers 仓库的最新 main 分支,否则可能会出现如下错误:

+../../_images/image.png +
pip install git+https://github.com/huggingface/transformers.git
+
+
+
    +
  1. 安装 torch_npu

  2. +
+
pip install torch==2.1.0 torch_npu==2.1.0.post6 -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+
+
+

提示

+

torch_npu 的版本需要匹配 torch 的版本,详细信息请参考:Ascend Extension for PyTorch

+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/trl/quick_start.html b/sources/trl/quick_start.html new file mode 100644 index 0000000..7263704 --- /dev/null +++ b/sources/trl/quick_start.html @@ -0,0 +1,187 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+ +
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境及 TRL (Transformer Reinforcement Learning) !

+
+

本教程以 DPO 方法为例,讲述如何使用 TRL 在昇腾 NPU 上进行模型的后训练。

+
+

前置准备

+

本篇样例代码为 TRL 官方样例,需提前进行下载:

+
git clone https://github.com/huggingface/trl.git
+
+
+
+
+

模型训练

+

进入 TRL 项目目录,依次执行如下命令:

+
cd examples/scripts
+python dpo.py
+
+
+

出现如下日志则代表训练成功:

+
Tokenizing train dataset: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 62135/62135 [07:11<00:00, 143.85 examples/s]
+Tokenizing eval dataset: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 144.73 examples/s]
+Detected kernel version 4.19.90, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
+  0%| ... | 0/3883 [00:00<?, ?it/s]/home/sss/github/trl/trl/trainer/dpo_trainer.py:1355: UserWarning: AutoNonVariableTypeMode is deprecated and will be removed in 1.10 release. For kernel implementations please use AutoDispatchBelowADInplaceOrView instead, If you are looking for a user facing API to enable running your inference-only workload, please use c10::InferenceMode. Using AutoDispatchBelowADInplaceOrView in user code is under risk of producing silent wrong result in some edge cases. See Note [AutoDispatchBelowAutograd] for more details. (Triggered internally at build/CMakeFiles/torch_npu.dir/compiler_depend.ts:74.)
+  labels[labels == label_pad_token_id] = 0
+Could not estimate the number of tokens of the input, floating-point operations will not be computed
+{'loss': 0.6598, 'grad_norm': 84.0019760131836, 'learning_rate': 4.967808395570435e-07, 'rewards/chosen': -0.15474730730056763, 'rewards/rejected': -0.24127893149852753, 'rewards/accuracies': 0.5799999833106995, 'rewards/margins': 0.0865316167473793, 'logps/rejected': -283.3350830078125, 'logps/chosen': -322.8130187988281, 'logits/rejected': -2.3705289363861084, 'logits/chosen': -2.455843925476074, 'epoch': 0.01}
+{'loss': 0.635, 'grad_norm': 88.99441528320312, 'learning_rate': 4.93561679114087e-07, 'rewards/chosen': -0.4325330853462219, 'rewards/rejected': -0.6220334768295288, 'rewards/accuracies': 0.625, 'rewards/margins': 0.1895003467798233, 'logps/rejected': -298.9117431640625, 'logps/chosen': -323.9031982421875, 'logits/rejected': -2.384589433670044, 'logits/chosen': -2.4548392295837402, 'epoch': 0.01}
+{'eval_loss': 0.6398493647575378, 'eval_runtime': 66.0493, 'eval_samples_per_second': 15.14, 'eval_steps_per_second': 1.893, 'eval_rewards/chosen': -0.5118070840835571, 'eval_rewards/rejected': -0.6984029412269592, 'eval_rewards/accuracies': 0.6269999742507935, 'eval_rewards/margins': 0.18659590184688568, 'eval_logps/rejected': -314.8978271484375, 'eval_logps/chosen': -344.0274658203125, 'eval_logits/rejected': -2.242685556411743, 'eval_logits/chosen': -2.3058021068573, 'epoch': 0.01}
+{'loss': 0.6284, 'grad_norm': 76.736572265625, 'learning_rate': 4.903425186711305e-07, 'rewards/chosen': -0.5593773722648621, 'rewards/rejected': -0.7793089151382446, 'rewards/accuracies': 0.6575000286102295, 'rewards/margins': 0.21993154287338257, 'logps/rejected': -305.3935241699219, 'logps/chosen': -327.55230712890625, 'logits/rejected': -2.3826913833618164, 'logits/chosen': -2.4632484912872314, 'epoch': 0.02}
+{'loss': 0.6126, 'grad_norm': 84.96641540527344, 'learning_rate': 4.87123358228174e-07, 'rewards/chosen': -0.5430492162704468, 'rewards/rejected': -0.8355176448822021, 'rewards/accuracies': 0.6700000166893005, 'rewards/margins': 0.29246845841407776, 'logps/rejected': -327.23089599609375, 'logps/chosen': -353.1753845214844, 'logits/rejected': -2.42580509185791, 'logits/chosen': -2.513734817504883, 'epoch': 0.03}
+{'eval_loss': 0.6248273253440857, 'eval_runtime': 66.049, 'eval_samples_per_second': 15.14, 'eval_steps_per_second': 1.893, 'eval_rewards/chosen': -0.4807929992675781, 'eval_rewards/rejected': -0.7240013480186462, 'eval_rewards/accuracies': 0.6439999938011169, 'eval_rewards/margins': 0.2432083934545517, 'eval_logps/rejected': -315.15380859375, 'eval_logps/chosen': -343.7173767089844, 'eval_logits/rejected': -2.2496635913848877, 'eval_logits/chosen': -2.3131723403930664, 'epoch': 0.03}
+{'loss': 0.6089, 'grad_norm': 78.2168960571289, 'learning_rate': 4.839041977852176e-07, 'rewards/chosen': -0.4145514667034149, 'rewards/rejected': -0.7176669239997864, 'rewards/accuracies': 0.6600000262260437, 'rewards/margins': 0.3031154274940491, 'logps/rejected': -332.7577209472656, 'logps/chosen': -364.76898193359375, 'logits/rejected': -2.436467409133911, 'logits/chosen': -2.530369997024536, 'epoch': 0.03}
+{'loss': 0.5769, 'grad_norm': 75.62930297851562, 'learning_rate': 4.806850373422611e-07, 'rewards/chosen': -0.6155031323432922, 'rewards/rejected': -1.012223720550537, 'rewards/accuracies': 0.7300000190734863, 'rewards/margins': 0.3967204988002777, 'logps/rejected': -291.11419677734375, 'logps/chosen': -331.4106750488281, 'logits/rejected': -2.378931999206543, 'logits/chosen': -2.4724509716033936, 'epoch': 0.04}
+{'eval_loss': 0.6191915273666382, 'eval_runtime': 66.0551, 'eval_samples_per_second': 15.139, 'eval_steps_per_second': 1.892, 'eval_rewards/chosen': -0.7790046334266663, 'eval_rewards/rejected': -1.1102681159973145, 'eval_rewards/accuracies': 0.6460000276565552, 'eval_rewards/margins': 0.33126339316368103, 'eval_logps/rejected': -319.0165100097656, 'eval_logps/chosen': -346.699462890625, 'eval_logits/rejected': -2.2563016414642334, 'eval_logits/chosen': -2.3217742443084717, 'epoch': 0.04}
+{'loss': 0.6159, 'grad_norm': 80.91998291015625, 'learning_rate': 4.774658768993046e-07, 'rewards/chosen': -0.7987264394760132, 'rewards/rejected': -1.1548289060592651, 'rewards/accuracies': 0.6225000023841858, 'rewards/margins': 0.3561025857925415, 'logps/rejected': -307.766357421875, 'logps/chosen': -319.15777587890625, 'logits/rejected': -2.369903326034546, 'logits/chosen': -2.430453300476074, 'epoch': 0.05}
+{'loss': 0.5967, 'grad_norm': 87.26203155517578, 'learning_rate': 4.7424671645634816e-07, 'rewards/chosen': -0.6392844319343567, 'rewards/rejected': -1.015390396118164, 'rewards/accuracies': 0.699999988079071, 'rewards/margins': 0.3761059045791626, 'logps/rejected': -313.5637512207031, 'logps/chosen': -319.4141845703125, 'logits/rejected': -2.3648269176483154, 'logits/chosen': -2.4581611156463623, 'epoch': 0.05}
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/wenet/index.html b/sources/wenet/index.html new file mode 100644 index 0000000..b006dd8 --- /dev/null +++ b/sources/wenet/index.html @@ -0,0 +1,164 @@ + + + + + + + + + WeNet — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/sources/wenet/install.html b/sources/wenet/install.html new file mode 100644 index 0000000..e320a09 --- /dev/null +++ b/sources/wenet/install.html @@ -0,0 +1,226 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本教程面向使用 WeNet & 昇腾的开发者,帮助完成昇腾环境下 WeNet 的安装。

+
+

昇腾环境安装

+

请根据已有昇腾产品型号及CPU架构等按照 快速安装昇腾环境指引 进行昇腾环境安装。

+
+

警告

+

CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。

+
+
+
+

Python 环境创建

+
1# 创建名为 wenet 的 python 3.10 的虚拟环境
+2conda create -y -n wenet python=3.10
+3# 激活虚拟环境
+4conda activate wenet
+
+
+
+
+

WeNet 安装

+

使用以下指令安装带有 torch-npu 的 WeNet 及训练相关依赖:

+
1# 安装带有 torch-npu 的 WeNet
+2pip install -e .[torch-npu]
+3
+4# 安装 WeNet 训练相关依赖
+5pip install -r requirements.txt
+
+
+

请遵循以下 torch-npu 相关库的版本控制:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Requirement

Minimum

Recommend

CANN

8.0.RC2.alpha003

latest

torch

2.1.0

2.2.0

torch-npu

2.1.0

2.2.0

torchaudio

2.1.0

2.2.0

deepspeed

0.13.2

latest

+
+
+

安装校验

+

使用以下 Python 脚本对 open_clip 的安装进行校验,正确打印 open_clip 的版本号和 NPU 卡号说明安装成功。

+
1import torch
+2import torch_npu
+3import timm
+4
+5print("timm version:", timm.version.__version__)
+6print("NPU devices:", torch.npu.current_device())
+
+
+

正确回显如下(单卡 NPU 环境):

+
timm version: 1.0.8.dev0
+NPU devices: 0
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/wenet/quick_start.html b/sources/wenet/quick_start.html new file mode 100644 index 0000000..380194a --- /dev/null +++ b/sources/wenet/quick_start.html @@ -0,0 +1,244 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境及 WeNet !

+
+

本文档帮助昇腾开发者快速使用 WeNet × 昇腾 进行自动语音识别(Automatic Speech Recognition, ASR)模型的训练、推理和评估等。

+

WeNet 提供了多种数据集及模型的实验脚本,该脚本将实验分为几个阶段,包含数据集的下载、模型的训练、推理、评估等,均存放在 examples 路径下, +本篇以 aishell-1 数据集的实验为例,基于 WeNet 官方教程 , +详述如何使用 NPU 实验脚本 进行从零开始的语音模型训练。

+

首先进入该脚本所在目录下:

+
1cd example/aishell/s0
+
+
+
+

下载数据

+

stage -1 阶段将 aishell-1 数据下载到本地路径 $data

+
1bash run_npu.sh --stage -1 --stop_stage -1
+
+
+

如果已下载数据,请更改 run_npu.sh 脚本中的变量 $data 值为实际数据集存放的绝对路径,并从下一阶段开始。

+
+
+

准备训练数据

+

stage 0 阶段为训练数据准备阶段,将使用 local/aishell_data_prep.sh 脚本将训练数据重新组织为 wav.scptext 两部分。

+
+

备注

+

wav.scp 每行记录两个制表符分隔的列: wav_idwav_path, +text 每行记录两个制表符分隔的列: wav_idtext_label

+
+
1bash run_npu.sh --stage 0 --stop_stage 0
+
+
+
+
+

提取最佳 cmvn 特征(可选)

+

stage 1 阶段从训练数据中提取 cmvn 特征,本阶段为可选阶段,设置 cmvn=false 可跳过本阶段。

+
1bash run_npu.sh --stage 1 --stop_stage 1
+
+
+

tools/compute_cmvn_stats.py 用于提取全局 cmvn(倒谱均值和方差归一化)统计数据,用来归一化声学特征。

+
+
+

生成 token 字典

+

stage 2 阶段生成训练所需 token 字典,用于 CTC 解码阶段查询,将输出转换为文字。

+
1bash run_npu.sh --stage 2 --stop_stage 2
+
+
+
+
+

准备 WeNet 数据格式

+

stage 3 阶段生成 WeNet 所需格式的文件 data.list

+
1bash run_npu.sh --stage 3 --stop_stage 3
+
+
+

生成的 data.list``每一行都是 json 格式,包含 关键词 ``key (文件名称), +语音文件地址 wav 和 对应文本内容 txt 三个关键数据。如下为一示例:

+
{"key": "BAC009S0002W0122", "wav": "/export/data/asr-data/OpenSLR/33//data_aishell/wav/train/S0002/BAC009S0002W0122.wav", "txt": "而对楼市成交抑制作用最大的限购"}
+
+
+
+
+

模型训练

+

stage 4 为模型训练阶段, run_npu.sh 脚本中实现了 NPU 卡号的自动获取和相关环境变量设置,因此可直接通过以下启动昇腾 NPU 上的模型训练:

+
1bash run_npu.sh --stage 4 --stop_stage 4
+
+
+

如需自行指定 NPU 卡号,请更改 run_npu.sh 脚本中的变量 ASCEND_RT_VISIBLE_DEVICES 值为指定卡号。

+
+

备注

+

有关断点重训,参数配置等,请参考 WeNet 官方文档

+
+
+
+

测试推理

+

stage 5 为模型测试推理阶段,将测试集中语音文件识别为文本:

+
1bash run_npu.sh --stage 5 --stop_stage 5
+
+
+

此外,stage 5 还提供平均模型的功能,平均模型指当 ${average_checkpoint}``为 ``true 时, +将交叉验证集上的最佳的 ${average_num} 个模型平均,生成增强模型。

+
+

备注

+

此阶段还提供解码和 WER 模型评估等功能,详细信息请参考 WeNet 官方文档

+
+
+
+

导出训练好的模型

+

stage 6 为模型导出阶段, wenet/bin/export_jit.py 使用 Libtorch 导出以上训练好的模型,导出的模型可用于其他编程语言(如 C++)的推理。

+
1bash run_npu.sh --stage 6 --stop_stage 6
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/whisper_cpp/index.html b/sources/whisper_cpp/index.html new file mode 100644 index 0000000..dc22901 --- /dev/null +++ b/sources/whisper_cpp/index.html @@ -0,0 +1,158 @@ + + + + + + + + + Whisper.cpp — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+ + +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/whisper_cpp/install.html b/sources/whisper_cpp/install.html new file mode 100644 index 0000000..ff37954 --- /dev/null +++ b/sources/whisper_cpp/install.html @@ -0,0 +1,191 @@ + + + + + + + + + 安装指南 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

安装指南

+

本教程面向使用 Whisper.cpp & 昇腾的开发者,帮助完成昇腾环境下 Whisper.cpp 的安装。

+
+

昇腾环境安装

+

请根据已有昇腾产品型号及CPU架构等按照 快速安装昇腾环境指引 进行昇腾环境安装。

+
+

警告

+

CANN 最低版本为 8.0.rc1,安装 CANN 时,请同时安装 Kernel 算子包。

+
+
+
+

Whisper.cpp 编译安装

+
    +
  1. 下载 Whisper.cpp 项目到本地

  2. +
+
1git clone https://github.com/ggerganov/whisper.cpp.git
+
+
+
    +
  1. 在 Whisper.cpp 项目目录下,创建构建目录并进入该目录

  2. +
+
1mkdir build
+2cd build
+
+
+
    +
  1. 编译安装 CANN 版本的 Whisper.cpp

  2. +
+
1cmake .. -D GGML_CANN=on
+2make -j
+
+
+
+
+

安装校验

+

编译完毕后,无任何报错信息,并输出以下关键回显即说明安装成功:

+
[ 90%] Built target quantize
+[ 95%] Linking CXX executable ../../bin/main
+[ 95%] Built target main
+[100%] Linking CXX executable ../../bin/server
+[100%] Built target server
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/sources/whisper_cpp/quick_start.html b/sources/whisper_cpp/quick_start.html new file mode 100644 index 0000000..3994b67 --- /dev/null +++ b/sources/whisper_cpp/quick_start.html @@ -0,0 +1,283 @@ + + + + + + + + + 快速开始 — 昇腾开源 1.0 文档 + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

快速开始

+
+

备注

+

阅读本篇前,请确保已按照 安装教程 准备好昇腾环境及 Whisper.cpp !

+
+

本文档帮助昇腾开发者快速使用 Whisper.cpp × 昇腾 进行自动语音识别(Automatic Speech Recognition, ASR)。

+
+

Whisper 模型下载

+

Whisper 模型是 OpenAI 训练并开源的 ASR 神经网络模型,是当前 ASR 领域主流模型之一。 +在 Whisper.cpp 中进行语音识别,需要下载 Whisper 模型并加载其 gguf 格式权重文件。 +本文提供三种模型的获取方式,请根据需要选择一种即可。

+
+

备注

+

gguf 是一种储存神经网络权重的文件格式,是一种二进制格式,旨在快速加载和保存模型,详见 ggml 官方文档

+
+
+

1. 使用脚本下载

+

使用 Whisper.cpp 项目中的 download-ggml-model.sh 脚本下载预先转换为 gguf 格式的 Whisper 模型:

+
1./download-ggml-model.sh base.en
+
+
+

其中 base.en 可替换为所需 Whisper 模型名称,Whisper 模型名称清单:

+
 1# Whisper models
+ 2models="tiny
+ 3        tiny.en
+ 4        tiny-q5_1
+ 5        tiny.en-q5_1
+ 6        base
+ 7        base.en
+ 8        base-q5_1
+ 9        base.en-q5_1
+10        small
+11        small.en
+12        small.en-tdrz
+13        small-q5_1
+14        small.en-q5_1
+15        medium
+16        medium.en
+17        medium-q5_0
+18        medium.en-q5_0
+19        large-v1
+20        large-v2
+21        large-v2-q5_0
+22        large-v3
+23        large-v3-q5_0"
+
+
+
+
+

2. 手动下载

+

预先转换为 gguf 格式的 Whisper 模型可由此处下载:

+ +
+
+

3. 自行转换模型

+

OpenAI 提供的模型 中选择一个下载,使用以下指令完成其到 gguf 模型的转换,并将其移动至 ./models/ 目录下:

+
1python models/convert-pt-to-ggml.py ~/.cache/whisper/medium.pt ~/path/to/repo/whisper/ ./models/whisper-medium
+2mv ./models/whisper-medium/ggml-model.bin models/ggml-medium.bin
+
+
+
+
+
+

语音文件预处理

+

使用 ffmpeg 转换所需处理的语音文件为 16 bit wav 语音文件,此处以 samples/gb0.ogg 为例:

+
1ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav
+
+
+
+
+

自动语音识别

+

使用以下指令,即可完成在昇腾 NPU 上的 Whisper.cpp 自动语音识别:

+
1./build/bin/main -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
+
+
+

输出语音识别结果与对应语音内容一致表明识别正确,以下为 samples/jfk.wav 语音的正确回显示例:

+
whisper_init_from_file_with_params_no_state: loading model from 'models/ggml-base.en.bin'
+whisper_init_with_params_no_state: use gpu    = 1
+whisper_init_with_params_no_state: flash attn = 0
+whisper_init_with_params_no_state: gpu_device = 0
+whisper_init_with_params_no_state: dtw        = 0
+whisper_model_load: loading model
+whisper_model_load: n_vocab       = 51864
+whisper_model_load: n_audio_ctx   = 1500
+whisper_model_load: n_audio_state = 512
+whisper_model_load: n_audio_head  = 8
+whisper_model_load: n_audio_layer = 6
+whisper_model_load: n_text_ctx    = 448
+whisper_model_load: n_text_state  = 512
+whisper_model_load: n_text_head   = 8
+whisper_model_load: n_text_layer  = 6
+whisper_model_load: n_mels        = 80
+whisper_model_load: ftype         = 1
+whisper_model_load: qntvr         = 0
+whisper_model_load: type          = 2 (base)
+whisper_model_load: adding 1607 extra tokens
+whisper_model_load: n_langs       = 99
+whisper_model_load:      CPU total size =   147.37 MB
+whisper_model_load: model size    =  147.37 MB
+whisper_backend_init_gpu: using CANN backend
+whisper_init_state: kv self size  =   18.87 MB
+whisper_init_state: kv cross size =   18.87 MB
+whisper_init_state: kv pad  size  =    3.15 MB
+whisper_init_state: compute buffer (conv)   =   16.75 MB
+whisper_init_state: compute buffer (encode) =  131.94 MB
+whisper_init_state: compute buffer (cross)  =    5.17 MB
+whisper_init_state: compute buffer (decode) =  153.13 MB
+
+system_info: n_threads = 8 / 192 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | METAL = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | CUDA = 0 | COREML = 0 | OPENVINO = 0 | CANN = 1
+
+main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 8 threads, 1 processors, 5 beams + best of 5, lang = en, task = transcribe, timestamps = 1 ...
+
+
+[00:00:00.000 --> 00:00:11.000]   And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
+
+
+whisper_print_timings:     load time =   223.83 ms
+whisper_print_timings:     fallbacks =   0 p /   0 h
+whisper_print_timings:      mel time =    19.95 ms
+whisper_print_timings:   sample time =    94.43 ms /   131 runs (    0.72 ms per run)
+whisper_print_timings:   encode time =   632.05 ms /     1 runs (  632.05 ms per run)
+whisper_print_timings:   decode time =    56.30 ms /     2 runs (   28.15 ms per run)
+whisper_print_timings:   batchd time =   930.68 ms /   125 runs (    7.45 ms per run)
+whisper_print_timings:   prompt time =     0.00 ms /     1 runs (    0.00 ms per run)
+whisper_print_timings:    total time =  2854.32 ms
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file