diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..3b3a6a0
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,21 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.8"
+
+sphinx:
+   configuration: conf.py
+
+# If using Sphinx, optionally build your docs in additional formats such as PDF
+formats:
+   - pdf
+
+# Optionally declare the Python requirements required to build your docs
+python:
+   install:
+   - requirements: requirements.txt
diff --git a/index.rst b/index.rst
index d4855f9..c8deee3 100644
--- a/index.rst
+++ b/index.rst
@@ -20,6 +20,9 @@
 
    sources/pytorch/index.rst
    sources/llamafactory/index.rst
+   sources/accelerate/index.rst
+   sources/transformers/index.rst
+   sources/onnxruntime/index.rst
    sources/open_clip/index.rst
    sources/timm/index.rst
 
@@ -82,11 +85,11 @@
                 </div>
                 <div class="flex-grow"></div>
                 <div class="flex space-x-4 text-blue-600">
-                    <a href="#">官方链接</a>
+                    <a href="https://github.com/microsoft/onnxruntime">官方链接</a>
                     <span class="split">|</span>
-                    <a href="#">安装指南</a>
+                    <a href="sources/onnxruntime/install.html">安装指南</a>
                     <span class="split">|</span>
-                    <a href="#">快速上手</a>
+                    <a href="sources/onnxruntime/quick_start.html">快速上手</a>
                 </div>
             </div>
             <!-- Card 4 -->
@@ -137,7 +140,7 @@
                 </div>
                 <div class="flex-grow"></div>
                 <div class="flex space-x-4 text-blue-600">
-                    <a href="#">官方链接</a>
+                    <a href="https://github.com/AUTOMATIC1111/stable-diffusion-webui">官方链接</a>
                     <span class="split">|</span>
                     <a href="#">安装指南</a>
                     <span class="split">|</span>
@@ -156,11 +159,11 @@
                 </div>
                 <div class="flex-grow"></div>
                 <div class="flex space-x-4 text-blue-600">
-                    <a href="#">官方链接</a>
+                    <a href="https://huggingface.co/docs/transformers/index">官方链接</a>
                     <span class="split">|</span>
-                    <a href="#">安装指南</a>
+                    <a href="href="sources/transformers/install.html">安装指南</a>
                     <span class="split">|</span>
-                    <a href="#">快速上手</a>
+                    <a href="href="sources/transformers/fine-tune.html">快速上手</a>
                 </div>
             </div>
             <!-- Card 8 -->
@@ -187,16 +190,16 @@
                     <div class="img w-16 h-16 rounded-md mr-4" style="background-image: url('_static/images/huggingface.png')"></div>
                     <div>
                         <h2 class="text-lg font-semibold">Accelerate</h2>
-                        <p class="text-gray-600 desc">图像和音频生成等扩散模型工具链</p>
+                        <p class="text-gray-600 desc">适用于Pytorch的多GPUs训练工具链</p>
                     </div>
                 </div>
                 <div class="flex-grow"></div>
                 <div class="flex space-x-4 text-blue-600">
-                    <a href="#">官方链接</a>
+                    <a href="https://github.com/huggingface/accelerate">官方链接</a>
                     <span class="split">|</span>
-                    <a href="#">安装指南</a>
+                    <a href="sources/accelerate/install.html">安装指南</a>
                     <span class="split">|</span>
-                    <a href="#">快速上手</a>
+                    <a href="sources/accelerate/quick_start.html">快速上手</a>
                 </div>
             </div>
             <!-- Card 10 -->
diff --git a/sources/accelerate/index.rst b/sources/accelerate/index.rst
new file mode 100644
index 0000000..a7d82a9
--- /dev/null
+++ b/sources/accelerate/index.rst
@@ -0,0 +1,8 @@
+Accelerate
+==============
+
+.. toctree::
+   :maxdepth: 2
+
+   install.rst
+   quick_start.rst
\ No newline at end of file
diff --git a/sources/accelerate/install.rst b/sources/accelerate/install.rst
new file mode 100644
index 0000000..4c35f45
--- /dev/null
+++ b/sources/accelerate/install.rst
@@ -0,0 +1,28 @@
+安装指南
+==============
+
+本教程面向使用 Accelerate & 昇腾的开发者，帮助完成昇腾环境下 Accelerate 的安装。
+
+Accelerate 下载安装
+--------------------
+
+.. note::
+    
+    阅读本篇前，请确保已按照 :doc:`安装教程 <./install>` 准备好昇腾环境！
+    或者直接使用具备昇腾环境的镜像 `cosdt/cann:8.0.rc1-910b-ubuntu22.04 <https://hub.docker.com/layers/cosdt/cann/8.0.rc1-910b-ubuntu22.04/images/sha256-29ef8aacf6b2babd292f06f00b9190c212e7c79a947411e213135e4d41a178a9?context=explore>`_,
+    更多的版本可至 `cosdt/cann <https://hub.docker.com/r/cosdt/cann/tags>`_ 获取。
+
+启动镜像
+:::::::::::::::::
+
+.. code-block:: shell
+  
+  docker run -itd --network host -v /usr/local/dcmi:/usr/local/dcmi -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi -v /usr/local/Ascend/driver:/usr/local/Ascend/driver -v /etc/ascend_install.info:/etc/ascend_install.info --device /dev/davinci7 --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc --shm-size 16G --name accelerate cosdt/cann:8.0.rc1-910b-ubuntu22.04 bash
+
+安装 Accelerate 及依赖包
+::::::::::::::::::::::::::
+
+.. code-block:: shell
+
+  pip install torch==2.2.0 torch_npu==2.2.0 accelerate -i https://pypi.tuna.tsinghua.edu.cn/simple
+
diff --git a/sources/accelerate/quick_start.rst b/sources/accelerate/quick_start.rst
new file mode 100644
index 0000000..8ec41c0
--- /dev/null
+++ b/sources/accelerate/quick_start.rst
@@ -0,0 +1,69 @@
+快速开始
+============
+
+.. note::
+    阅读本篇前，请确保已按照 :doc:`安装指南 <./install>` 准备好昇腾环境及 Accelerate !
+    
+本教程以一个简单的 NLP 模型为例，讲述如何使用 Accelerate 在昇腾 NPU 上进行模型的训练。
+
+前置准备
+------------
+
+本篇将使用到 HuggingFace 其他工具链及 scikit-learn 库，请使用以下指令安装：
+
+.. code-block::
+
+  pip install datasets evaluate transformers scikit-learn -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+本篇样例代码为 Accelrate 官方样例，需提前进行下载
+
+.. code-block::
+
+  git clone https://github.com/huggingface/accelerate.git
+
+模型训练
+------------
+
+.. code-block::
+  :linenos:
+
+  # 替换HF域名，方便国内用户进行数据及模型的下载
+  export HF_ENDPOINT=https://hf-mirror.com
+  # 进入项目目录     
+  cd accelerate/examples
+  # 模型训练
+  python nlp_example.py
+
+出现如下日志代表训练成功：
+
+::
+
+    Downloading builder script: 5.75kB [00:01, 3.69kB/s]                                                                                                  
+    tokenizer_config.json: 100%|████████████████████████████████████████████████████████████████████████████████████████| 49.0/49.0 [00:00<00:00, 237kB/s]
+    config.json: 570B [00:00, 2.23MB/s]                                                                                                                   
+    vocab.txt: 79.5kB [00:12, 3.45kB/s]Error while downloading from https://hf-mirror.com/bert-base-cased/resolve/main/vocab.txt: HTTPSConnectionPool(host='hf-mirror.com', port=443): Read timed out.
+    Trying to resume download...
+    vocab.txt: 213kB [00:07, 15.5kB/s]]
+    vocab.txt: 91.4kB [00:32, 2.81kB/s]
+    tokenizer.json: 436kB [00:19, 22.8kB/s] 
+    Downloading readme: 35.3kB [00:01, 26.4kB/s]
+    Downloading data: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 649k/649k [00:02<00:00, 288kB/s]
+    Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████| 75.7k/75.7k [00:00<00:00, 77.8kB/s]
+    Downloading data: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 308k/308k [00:01<00:00, 204kB/s]
+    Generating train split: 100%|███████████████████████████████████████████████████████████████████████████| 3668/3668 [00:00<00:00, 27701.23 examples/s]
+    Generating validation split: 100%|████████████████████████████████████████████████████████████████████████| 408/408 [00:00<00:00, 73426.42 examples/s]
+    Generating test split: 100%|███████████████████████████████████████████████████████████████████████████| 1725/1725 [00:00<00:00, 246370.91 examples/s]
+    Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████| 3668/3668 [00:01<00:00, 3378.05 examples/s]
+    Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 408/408 [00:00<00:00, 3553.72 examples/s]
+    Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████| 1725/1725 [00:00<00:00, 5109.03 examples/s]
+    model.safetensors: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 436M/436M [02:42<00:00, 2.68MB/s]
+    Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
+    You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+    huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+    To disable this warning, you can either:
+      - Avoid using `tokenizers` before the fork if possible
+      - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+    You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
+    epoch 0: {'accuracy': 0.8014705882352942, 'f1': 0.8439306358381503}
+    epoch 1: {'accuracy': 0.8578431372549019, 'f1': 0.8975265017667845}
+    epoch 2: {'accuracy': 0.8700980392156863, 'f1': 0.9087779690189329}
diff --git a/sources/onnxruntime/index.rst b/sources/onnxruntime/index.rst
new file mode 100644
index 0000000..1108f40
--- /dev/null
+++ b/sources/onnxruntime/index.rst
@@ -0,0 +1,8 @@
+ONNX Runtime
+============
+
+.. toctree::
+   :maxdepth: 2
+
+   install.rst
+   quick_start.rst
diff --git a/sources/onnxruntime/install.rst b/sources/onnxruntime/install.rst
new file mode 100644
index 0000000..8837bda
--- /dev/null
+++ b/sources/onnxruntime/install.rst
@@ -0,0 +1,33 @@
+安装指南
+===========
+
+本教程面向使用 ONNX Runtime & Ascend NPU 的开发者，帮助完成昇腾环境下 ONNX Runtime 的安装。
+
+.. note::
+    
+    阅读本篇前，请确保已按照 :doc:`安装教程 <../ascend/quick_install>` 准备好昇腾环境！
+
+ONNX Runtime 安装
+-------------------
+
+ONNX Runtime 目前提供了 源码编译 和 二进制包 两种安装方式，其中二进制包当前只支持Python。
+
+从源码安装
+^^^^^^^^^^^^
+
+.. code-block:: shell
+    :linenos:
+
+    # Default path, change it if needed.
+    source /usr/local/Ascend/ascend-toolkit/set_env.sh
+
+    ./build.sh --config <Release|Debug|RelWithDebInfo> --build_shared_lib --parallel --use_cann
+
+
+从pip安装
+^^^^^^^^^^^^
+
+.. code-block:: shell
+    :linenos:
+
+    pip3 install onnxruntime-cann
diff --git a/sources/onnxruntime/quick_start.rst b/sources/onnxruntime/quick_start.rst
new file mode 100644
index 0000000..60cf448
--- /dev/null
+++ b/sources/onnxruntime/quick_start.rst
@@ -0,0 +1,97 @@
+快速开始
+===========
+
+.. note::
+    阅读本篇前，请确保已按照 :doc:`安装指南 <./install>` 准备好昇腾环境及 ONNX Runtime!
+    
+本教程以一个简单的 resnet50 模型为例，讲述如何在 Ascend NPU上使用 ONNX Runtime 进行模型推理。
+
+环境准备
+-----------
+
+安装本教程所依赖的额外必要库。
+
+.. code-block:: shell
+  :linenos:
+
+  pip install numpy Pillow onnx
+
+模型准备
+-----------
+
+ONNX Runtime 推理需要 ONNX 格式模型作为输入，目前有以下几种主流途径获得 ONNX 模型。
+
+1. 从 `ONNX Model Zoo <https://onnx.ai/models/>`_ 中下载模型。
+2. 从 torch、TensorFlow 等框架导出 ONNX 模型。
+3. 使用转换工具，完成其他类型到 ONNX 模型的转换。
+
+本教程使用的 resnet50 模型是从 ONNX Model Zoo 中直接下载的，具体的 `下载链接 <https://github.com/onnx/models/blob/main/Computer_Vision/resnet50_Opset16_torch_hub/resnet50_Opset16.onnx>`_
+
+类别标签
+-----------
+
+类别标签用于将输出权重转换成人类可读的类别信息，具体的 `下载链接 <https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json>`_
+
+模型推理
+-----------
+
+.. code-block:: python
+  :linenos:
+
+  import onnxruntime as ort
+  import numpy as np
+  import onnx
+  from PIL import Image
+
+  def preprocess(image_path):
+      img = Image.open(image_path)
+      img = img.resize((224, 224))
+      img = np.array(img).astype(np.float32)
+
+      img = np.transpose(img, (2, 0, 1))
+      img = img / 255.0
+      mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
+      std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
+      img = (img - mean) / std
+      img = np.expand_dims(img, axis=0)
+      return img
+
+  def inference(model_path, img):
+      options = ort.SessionOptions()
+      providers = [
+          (
+              "CANNExecutionProvider",
+              {
+                  "device_id": 0,
+                  "arena_extend_strategy": "kNextPowerOfTwo",
+                  "npu_mem_limit": 2 * 1024 * 1024 * 1024,
+                  "op_select_impl_mode": "high_performance",
+                  "optypelist_for_implmode": "Gelu",
+                  "enable_cann_graph": True
+              },
+          ),
+          "CPUExecutionProvider",
+      ]
+
+      session = ort.InferenceSession(model_path, sess_options=options, providers=providers)
+      input_name = session.get_inputs()[0].name
+      output_name = session.get_outputs()[0].name
+
+      result = session.run([output_name], {input_name: img})
+      return result
+
+  def display(classes_path, result):
+      with open(classes_path) as f:
+          labels = [line.strip() for line in f.readlines()]
+      
+      pred_idx = np.argmax(result)
+      print(f'Predicted class: {labels[pred_idx]} ({result[0][0][pred_idx]:.4f})')
+
+  if __name__ == '__main__':
+      model_path = '~/model/resnet/resnet50.onnx'
+      image_path = '~/model/resnet/cat.jpg'
+      classes_path = '~/model/resnet/imagenet_classes.txt'
+
+      img = preprocess(image_path)
+      result = inference(model_path, img)
+      display(classes_path, result)
diff --git a/sources/transformers/fine-tune.rst b/sources/transformers/fine-tune.rst
new file mode 100644
index 0000000..e520f20
--- /dev/null
+++ b/sources/transformers/fine-tune.rst
@@ -0,0 +1,250 @@
+微调预训练模型
+==================
+
+.. note::
+
+    阅读本篇前，请确保已按照 :doc:`安装指南 <./install>` 准备好昇腾环境及transformers！
+
+大模型微调本质是利用特定领域的数据集对已预训练的大模型进行进一步训练的过程。它旨在优化模型在特定任务上的性能，使模型能够更好地适应和完成特定领域的任务。
+本文在使用transformers库选定相关数据集和预训练模型的基础上，通过超参数调优完成对模型的微调。
+
+前置准备
+-----------------
+
+安装必要库
+<<<<<<<<<<<<<<<
+
+.. code-block:: shell
+    :linenos:
+
+    pip install transformers datasets evaluate accelerate scikit-learn
+
+加载数据集
+<<<<<<<<<<<<<<<<<<<
+
+模型训练需要使用数据集，这里使用 `Yelp Reviews dataset <https://huggingface.co/datasets/Yelp/yelp_review_full>`_ ：
+
+.. code-block:: python
+    :linenos:
+
+    from datasets import load_dataset
+
+    # load_dataset 会自动下载数据集并将其保存到本地路径中
+    dataset = load_dataset("yelp_review_full")
+    #输出数据集的第100条数据
+    dataset["train"][100]
+
+输出如下:
+
+.. code-block:: shell
+
+    {'label': 0, 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularly...that takes something special!\\n
+    The cashier took my friends\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the 
+    person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\'s meal. After watching two people who ordered after 
+    me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \\"serving off their orders\\" when they didn\'t have 
+    their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\\nThe 
+    manager was rude when giving me my order. She didn\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that 
+    I felt I was getting poor service.\\nI\'ve eaten at various McDonalds restaurants for over 30 years. I\'ve worked at more than one location. I expect 
+    bad days, bad moods, and the occasional mistake. But I have yet to have a decent experience at this store. It will remain a place I avoid unless someone 
+    in my party needs to avoid illness from low blood sugar. Perhaps I should go back to the racially biased service of Steak n Shake instead!'}
+
+
+预处理数据集
+<<<<<<<<<<<<<<<<<
+
+预处理数据集需要使用AutoTokenizer，它用来自动获取与模型匹配的分词器，分词器根据规则将文本拆分为标记，并转换为张量作为模型输入，
+下面用到了Meta-Llama-3-8B-Instruct模型，下载模型请转至 `模型获取 <./modeldownload.html>`_，以下是一个示例：
+
+.. code-block:: python
+    :linenos:
+
+    from transformers import AutoTokenizer
+
+    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+    #使用分词器处理文本
+    encoded_input = tokenizer("Do not meddle in the affairs of wizards, for they are subtle and quick to anger.")
+    print(encoded_input)
+
+输出如下:
+
+.. code-block:: shell
+
+    {'input_ids': [128000, 5519, 539, 1812, 91485, 304, 279, 22747, 315, 89263, 11, 369, 814, 527, 27545, 323, 4062, 311, 19788, 13],
+     'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+接着使用dataset.map方法对数据集进行预处理：
+
+.. code-block:: python
+    :linenos:
+
+    def tokenize_function(examples):
+        return tokenizer(examples["text"], padding="max_length", truncation=True)
+
+    tokenized_datasets = dataset.map(tokenize_function, batched=True)
+
+初次进行预处理需要一定时间，内容如下：
+
+.. code-block:: shell
+    :linenos:
+
+    Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no padding.
+    Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
+    Map: 100%|████████████████████████████████████████████████████████████████████████| 650000/650000 [03:27<00:00, 3139.47 examples/s]
+    Map: 100%|██████████████████████████████████████████████████████████████████████████| 50000/50000 [00:15<00:00, 3156.92 examples/s]
+
+训练全部的数据集会耗费更长的时间，通常将其划分为较小的训练集和验证集，以提高训练速度：
+
+.. code-block:: python
+    :linenos:
+
+    small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+    small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+    
+    # 下面是加载全训练集和验证集
+    # full_train_dataset = tokenized_datasets["train"]
+    # full_eval_dataset = tokenized_datasets["test"]
+
+训练
+------------
+
+加载模型
+<<<<<<<<<
+
+使用AutoModelForCausalLM将自动加载模型：
+
+.. code-block:: python
+    :linenos:
+
+    from transformers import AutoModelForCausalLM
+
+    model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+
+超参数调优
+<<<<<<<<<<<<<<<<<<<<<
+
+超参数调优用于激活不同训练选项的标志，它定义了关于模型的更高层次的概念，例如模型复杂程度或学习能力，下边使用TrainingArguments类来加载：
+
+.. code-block:: python
+    :linenos:
+
+    from transformers import TrainingArguments
+
+    training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
+
+模型评估
+<<<<<<<<<<<<<
+
+模型评估用于衡量模型在给定数据集上的表现，包括准确率，完全匹配速率，平均并交集点等，下面是使用方式：
+
+.. code-block:: python
+    :linenos:
+
+    import 
+    import sklearn
+    import evaluate
+
+    metric = evaluate.load("accuracy")
+
+    #计算预测的准确性,并将预测传递给compute
+    def compute_metrics(eval_pred):
+        logits, labels = eval_pred
+        predictions = np.argmax(logits, axis=-1)
+        return metric.compute(predictions=predictions, references=labels)
+
+
+Trainer
+<<<<<<<
+
+使用已加载的模型、训练参数、训练和测试数据集以及评估函数创建一个Trainer对象，并调用trainer.train()来微调模型：
+
+.. code-block:: python
+    :linenos:
+    
+    from transformers import Trainer
+
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=small_train_dataset,
+        eval_dataset=small_eval_dataset,
+        compute_metrics=compute_metrics,
+    )
+
+    trainer.train()
+
+
+预训练全流程
+-------------------
+
+.. code-block:: python
+    :linenos:
+
+    import torch
+    import torch_npu
+    import numpy as np
+    import sklearn
+    import evaluate
+    from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
+    from datasets import load_dataset
+
+    model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+    device = "npu:0" if torch.npu.is_available() else "cpu"
+    
+    # 加载分词器和模型
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+    ).to(device)
+
+    dataset = load_dataset("yelp_review_full")
+
+    #分词函数
+    def tokenize_function(examples):
+        return tokenizer(examples["text"], padding="max_length", truncation=True)
+
+    tokenized_datasets = dataset.map(tokenize_function, batched=True)
+
+    small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+    small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+    # 加载评估指标
+    metric = evaluate.load("accuracy")
+
+    # 定义评估指标的计算函数
+    def compute_metrics(eval_pred):
+        logits, labels = eval_pred
+        predictions = np.argmax(logits, axis=-1)
+        return metric.compute(predictions=predictions, references=labels)
+
+    training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
+
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=small_train_dataset,
+        eval_dataset=small_eval_dataset,
+        compute_metrics=compute_metrics,
+    )
+
+    trainer.train()
+
+
+训练完成后得到以下结果：
+
+.. code-block:: shell
+    :linenos:
+
+    |█████████████████████████████████| [375/375 06:21, Epoch 3/3]
+
+    =====  =============  ===============  ======
+    Epoch  Training Loss  Validation Loss  Accuracy
+    =====  =============  ===============  ======
+    1	    No log	    1.155628	0.499000
+    2	    No log	    0.994618	0.574000
+    3	    No log	    1.026123	0.590000
+    =====  =============  ===============  ======
+
+    TrainOutput(global_step=375, training_loss=1.0557311197916666, metrics={'train_runtime': 384.55, 'train_samples_per_second': 7.801, 
+    'train_steps_per_second': 0.975, 'total_flos': 789354427392000.0, 'train_loss': 1.0557311197916666, 'epoch': 3.0})
diff --git a/sources/transformers/images/downloadmodel.png b/sources/transformers/images/downloadmodel.png
new file mode 100644
index 0000000..e2e119f
Binary files /dev/null and b/sources/transformers/images/downloadmodel.png differ
diff --git a/sources/transformers/index.rst b/sources/transformers/index.rst
new file mode 100644
index 0000000..534ccd4
--- /dev/null
+++ b/sources/transformers/index.rst
@@ -0,0 +1,11 @@
+Transformers
+==================
+
+.. toctree::
+    :maxdepth: 2
+
+    install.rst
+    quick_start.rst
+    modeldownload.rst
+    fine-tune.rst
+    inference.rst
\ No newline at end of file
diff --git a/sources/transformers/inference.rst b/sources/transformers/inference.rst
new file mode 100644
index 0000000..e66aca1
--- /dev/null
+++ b/sources/transformers/inference.rst
@@ -0,0 +1,183 @@
+推理 
+==================
+
+.. note::
+
+    阅读本篇前，请确保已按照 :doc:`安装指南 <./install>` 准备好昇腾环境及transformers！
+
+在推理阶段，训练好的模型被用于对图像、语音或文本进行分类，也可以用于语言生成、翻译等。
+
+本文的模型推理以transformers的pipeline为中心进行介绍，pipelines可以自动加载模型和能够进行任务推理的预处理类，使任何模型进行任何语言、计算机视觉、语音以及多模态任务的推理变得非常简单。
+
+pipeline 抽象类
+------------------
+
+pipeline 抽象类是所有其他 pipeline 的封装，可以像其他任何 pipeline 一样实例化。
+
+pipeline 参数由 task、tokenizer、model、optional 组成：
+
+- task 将确定返回哪一个 pipeline，比如 text-classification 将会返回 TextClassificationPipeline，image-to-image 将会返回 ImageToImagePipeline。
+
+- tokenizer分词器是用来将输入进行编码，str或者PreTrainedTokenizer，如果未提供将使用model参数，如果model也未提供或者非str,将使用config参数，如果config参数也未提供或者非str，将提供task的默认tokenizer。
+
+- model是模型，str或者PreTrainedModel，一般为有.bin模型文件的目录。
+
+- optional其他参数包括，config、feature_extractor、device、device_map等。
+
+
+pipeline 使用
+----------------------
+
+pipeline适用于音频、计算机视觉、自然语言处理和多模态任务，下面将介绍它在各场景的使用方式。
+
+音频
+<<<<<<<<<<<<<
+
+音频识别
+>>>>>>>>>>>>
+
+用于提取某些音频中包含的文本，如下创建pipeline，并输入音频文件：
+
+.. code-block:: python
+    :linenos:
+
+    from transformers import pipeline
+
+    transcriber = pipeline(task="automatic-speech-recognition")
+    transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+
+    #以下为输出示例
+    {'text': 'I HAVE A DREAM BUT ONE DAY THIS NATION WILL RISE UP LIVE UP THE TRUE MEANING OF ITS TREES'}
+
+文本转音频
+>>>>>>>>>>>
+
+根据输入文本和可选的其他条件输入生成音频文件：
+
+.. code-block:: python
+    :linenos:
+
+    from transformers import pipeline
+
+    pipe = pipeline(model="suno/bark-small")
+    output = pipe("Hey it's HuggingFace on the phone!")
+
+    audio = output["audio"]
+    sampling_rate = output["sampling_rate"]
+
+计算机视觉
+<<<<<<<<<<<<<<<<<
+
+图像分类
+>>>>>>>>>>>>>>
+
+图像分类可以识别图片特征，并给出分类标签和置信度得分：
+
+.. code-block:: python
+    :linenos:
+
+    from transformers import pipeline
+
+    classifier = pipeline(model="microsoft/beit-base-patch16-224-pt22k-ft22k")
+    classifier("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
+
+    #以下为输出示例
+    [{'score': 0.442, 'label': 'macaw'}, {'score': 0.088, 'label': 'popinjay'}, {'score': 0.075, 'label': 'parrot'}, {'score': 0.073, 'label': 'parodist, lampooner'}, {'score': 0.046, 'label': 'poll, poll_parrot'}]
+
+图像转图像
+>>>>>>>>>>>>>
+
+它可以将图像根据信息生成新图像，以下示例通过图像超分辨率模型将低分辨率图像放大并增强其细节，使其看起来更清晰：
+
+.. code-block:: python
+    :linenos:
+
+    from PIL import Image
+    import requests
+    from transformers import pipeline
+
+    upscaler = pipeline("image-to-image", model="caidas/swin2SR-classical-sr-x2-64")
+    img = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw)
+    img = img.resize((64, 64))
+    upscaled_img = upscaler(img) #超分辨率处理
+    print(img.size)          
+    print(upscaled_img.size) 
+
+    #以下为输出示例
+    (64, 64)    # 输出原图像的尺寸 
+    (144, 144)  # 输出处理后图像的尺寸
+
+自然语言处理
+<<<<<<<<<<<<<<<<<
+
+文本分类
+>>>>>>>>>>>>>>>>>>>
+
+根据标签对文本进行分类:
+
+.. code-block:: shell
+    :linenos:
+
+    from transformers import pipeline
+    classifier = pipeline(model="meta-llama/Meta-Llama-3-8B-Instruct")
+    classifier(
+        "I have a problem with my iphone that needs to be resolved asap!!",
+        candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
+    )
+    #以下为输出示例
+    #{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}
+
+文本生成
+>>>>>>>>>>>>>>>>>
+
+根据文本生成对话响应：
+
+.. code-block:: shell
+    :linenos:
+
+    from transformers import pipeline
+
+    generator = pipeline(model="HuggingFaceH4/zephyr-7b-beta")
+    # Zephyr-beta is a conversational model, so let's pass it a chat instead of a single string
+    generator([{"role": "user", "content": "What is the capital of France? Answer in one word."}], do_sample=False, max_new_tokens=2)
+
+    #以下为输出示例
+    [{'generated_text': [{'role': 'user', 'content': 'What is the capital of France? Answer in one word.'}, {'role': 'assistant', 'content': 'Paris'}]}]
+
+多模态
+<<<<<<<<<<<<<<
+
+视觉问答
+>>>>>>>>>>>>>
+
+VQA使用图像和关于该图像的问题进行提问，图像可以是URL或图像的本地路径:
+
+.. code-block:: shell
+    :linenos:
+
+    from transformers import pipeline
+    vqa = pipeline(model="meta-llama/Meta-Llama-3-8B-Instruct")
+    output = vqa(
+        image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png",
+        question="What is the invoice number?",
+    )
+    output[0]["score"] = round(output[0]["score"], 3)
+
+    #以下为输出示例
+    #[{'score': 0.425, 'answer': 'us-001', 'start': 16, 'end': 16}]
+
+图像转文本
+>>>>>>>>>>>>>>>>>>>>
+
+用于预测给定图像的主题：
+
+.. code-block:: shell
+    :linenos:
+
+    from transformers import pipeline
+
+    captioner = pipeline(model="ydshieh/vit-gpt2-coco-en")
+    captioner("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
+
+    #以下为输出示例
+    [{'generated_text': 'two birds are standing next to each other '}]
diff --git a/sources/transformers/install.rst b/sources/transformers/install.rst
new file mode 100644
index 0000000..023c183
--- /dev/null
+++ b/sources/transformers/install.rst
@@ -0,0 +1,90 @@
+安装指南
+===========
+
+本文将介绍如何在昇腾环境下使用transfomers，帮助开发者完成transformers的安装。
+
+.. note:: 
+
+    请确保环境安装了对应的固件和驱动，详情请参考 `快速安装昇腾环境 <../ascend/quick_install.html>`_。
+
+创建虚拟环境
+--------------------
+
+首先需要安装并激活python环境：
+
+.. code-block:: shell
+
+    conda create -n your_env_name python=3.10
+    conda activate your_env_name
+
+同时安装依赖库：
+
+.. code-block:: shell
+
+    # install torch
+    pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple torch==2.2.0
+
+    # install torch-npu
+    pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple torch-npu==2.2.0
+
+安装transformers
+----------------------
+
+直接使用pip命令进行安装：
+
+.. code-block:: shell
+
+    pip install -i https://pypi.tuna.tsinghua.edu.cn/simple transformers
+
+验证安装
+--------------------
+
+.. code-block:: python 
+
+    from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+    import torch
+    import torch_npu
+
+    # 检查 NPU 是否可用
+    if torch.npu.is_available():
+        device = torch.device("npu:0")
+        print("NPU is available. Using NPU.")
+    else:
+        device = torch.device("cpu")
+        print("NPU is not available. Using CPU.")
+
+    model_id = "bert-base-uncased"
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForSequenceClassification.from_pretrained(model_id)
+
+    model.to(device)
+
+    nlp_pipeline = pipeline(
+        "sentiment-analysis",
+        model=model,
+        tokenizer=tokenizer,
+        device=0 if torch.npu.is_available() else -1
+    )
+
+    #分析句子情感并输出
+    result = nlp_pipeline("This is a test sentence.")
+    print(result)
+
+
+如果成功运行并输出下面内容，则安装成功：
+
+.. code-block:: shell 
+
+    NPU is available. Using NPU.
+    Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
+    You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+    [{'label': 'POSITIVE', 'score': 0.9998704791069031}]
+
+卸载transformers
+---------------------
+
+.. code-block:: shell 
+
+    pip uninstall transformers
+
+
diff --git a/sources/transformers/modeldownload.rst b/sources/transformers/modeldownload.rst
new file mode 100644
index 0000000..a5c4bb3
--- /dev/null
+++ b/sources/transformers/modeldownload.rst
@@ -0,0 +1,126 @@
+模型获取
+==============
+
+本文以Meta-Llama-3-8B-Instruct模型为例，介绍如何进行模型的获取，
+该模型获取目前主要有三种方式，Meta官方_，HuggingFace_，hf-mirror_， 下面将详细说明这三种获取模型的方法。
+
+Meta官方
+-----------------
+
+下载模型前需要获取licence，前往 `Meta官网 <https://llama.meta.com/llama-downloads>`_，提供信息获取到许可证，拿到已签名的URL。
+
+- 链接类似于下面：
+
+.. code-block:: shell 
+    :linenos:
+
+    https://download6.llamameta.net/*?Policy=eyJTdGF0ZW1lbnQiOlt7InVuaXF1ZV9oYXNoIjoibGJuYXc0bzdrY2pqNnoxeXZ1N3hmcmNvIiwiUmVzb3VyY2UiOiJodHRwczp
+    cL1wvZG93bmxvYWQ2LmxsYW1hbWV0YS5uZXRcLyoiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE3MTY0MzYyMTF9fX1dfQ__&Signature=KTyc
+    LZkPxqMYY0XqW047tNN9IWX%7EOxlQbqCsDqmcX0vE8oia3Qej-x6aGFQSJhkHRULu8Efso5Qde8KRiptK5rGh9oLrtMeAS3SID%7EOyk38o9NNLKxWokA7yQxwvUVRqibVMJyhkE8XE
+    K2HDNftKT9KLaDG8HHFQmGWuhdTJSvCezJIRKWPtzRf0dohepOiOHOcQW%7Ermo7m6iI595PuoX7o3bVYpFYQf1Syrp05XCr9t2-Rzf8xaIYF5-2vFqELFyFyJys%7E5lA4178elcJcU
+    ImSSokn1IJBARAZ0iLaWDFsuTbvDJmz9j-ccHFJzgDPCMLQjHpK6QfCk4TWGmdyXMg__&Key-Pair-Id=K15QRJLYKIFSLZ&Download-Request-ID=1502880093958574
+
+- 之后获取源码，使用以下命令下载并进入到工作目录：
+
+.. code-block:: shell 
+    :linenos:
+
+    git clone https://github.com/meta-llama/llama3.git
+    cd llama3
+
+- 运行脚本：
+
+.. code-block:: python
+    :linenos:
+
+    ./download.sh
+
+运行时输入上边获取到的URL，即可进行模型的下载。
+
+
+HuggingFace
+--------------------
+HuggingFace同样需要获得licence，访问仓库 `meta-llama/Meta-Llama-3-8B-Instruct <https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct>`_ ，接受许可后等待请求获得批准即可。
+
+得到权限后，点击"文件和版本"标签，下载原始文件夹的内容或通过以下命令行下载：
+
+- 安装huggingface-hub：
+
+.. code-block:: shell
+
+    pip install huggingface-hub
+
+- 下载文件：
+
+.. code-block:: shell
+
+    huggingface-cli download meta-llama/Meta-Llama-3-8B-Instruct --include “original/*” --local-dir meta-llama/Meta-Llama-3-8B-Instruct
+
+以上两种方法国内用户可能无法完成，下面推荐 **国内用户** 的获取模型的方式。
+
+hf-mirror
+-------------------
+
+hf-mirror是更适合国内用户获取模型的方式，它是HuggingFace平台的镜像网站， 提供了一个备用的域名来访问HuggingFace的资源和功能，
+以 `Qwen2-7B-Instruct <https://hf-mirror.com/Qwen/Qwen2-7B-Instruct>`_ 为例（Meta-Llama-3-8B-Instruct同样需要获取license，不方便国内用户， 这里用Qwen2代替说明）， 共有三种方法，下面依次进行介绍。
+
+直接下载
+<<<<<<<<<<<<<<<
+
+点击模型的下的 **↓** 图标下载文件，如下：
+
+.. figure:: ./images/image.png
+    :align: center
+
+修改镜像源
+<<<<<<<<<<<<<<<<<<
+
+- 修改环境变量HF_ENDPOINT，该变量会替换huggingface.co域名：
+
+.. code-block:: shell
+    :linenos:
+
+    # 临时生效    
+    export HF_ENDPOINT=https://hf-mirror.com    
+    # 永久生效    
+    echo export HF_ENDPOINT=https://hf-mirror.com >> ~/.bashrc 
+
+
+- 安装huggingface-hub：
+
+.. code-block:: shell
+
+    pip install huggingface-hub
+
+
+- 下载文件：
+
+.. code-block:: python
+    :linenos:
+
+    # huggingface_hub下载单个文件 
+    from huggingface_hub import hf_hub_download 
+    hf_hub_download(repo_id="Qwen/Qwen2-7B-Instruct", filename="config.json", cache_dir="./your/path/Qwen")
+    
+    # huggingface_hub下载整个项目 
+    from huggingface_hub import snapshot_download    
+    snapshot_download(repo_id="Qwen/Qwen2-7B-Instruct", cache_dir="./your/path/Qwen")
+
+git lfs
+<<<<<<<<<<<<<<<<<<<
+
+使用以下命令下载模型：
+
+.. code-block:: shell
+    :linenos:
+
+    # Make sure you have git-lfs installed (https://git-lfs.com)
+    git lfs install
+
+    git clone https://hf-mirror.com/Qwen/Qwen2-7B-Instruct
+
+    # If you want to clone without large files - just their pointers
+    # GIT_LFS_SKIP_SMUDGE=1 git clone https://hf-mirror.com/Qwen/Qwen2-7B-Instruct
+
+
+使用以上任意一种方式即可完成模型的获取，将模型保存在本地路径后可以进行 `微调预训练模型 <./fine-tune.html>`_ 和 `推理 <./inference.html>`_ 等操作。
\ No newline at end of file
diff --git a/sources/transformers/quick_start.rst b/sources/transformers/quick_start.rst
new file mode 100644
index 0000000..a1702ff
--- /dev/null
+++ b/sources/transformers/quick_start.rst
@@ -0,0 +1,131 @@
+快速开始
+============
+
+.. note::
+
+    阅读本篇前，请确保已按照 :doc:`安装指南 <./install>` 准备好昇腾环境及transformers！
+
+
+本文以Meta-Llama-3-8B-Instruct模型为例，介绍如何通过transformers使用模型进行推理，
+针对模型推理transformers提供了 AutoModelForCausalLM_，pipeline_ 两种方式，下面将说明这两种接口的使用方式。
+
+.. note::
+
+以下模型用到了Meta-Llama-3-8B-Instruct， 具体可以参考 `模型获取 <./modeldownload.html>`_。
+
+AutoModelForCausalLM
+-----------------------------------------------
+
+.. code-block:: python
+    :linenos:
+
+    import torch
+    import torch_npu
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+
+    model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+    device = "npu:0" if torch.npu.is_available() else "cpu"
+    
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+    ).to(device)
+
+
+pipeline
+-------------------------
+
+.. code-block:: python
+    :linenos:
+
+    import transformers
+    import torch
+    import torch_npu   
+    
+    model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+    device = "npu:0" if torch.npu.is_available() else "cpu"
+    
+    pipeline = transformers.pipeline(
+        "text-generation",
+        model=model_id,
+        model_kwargs={"torch_dtype": torch.bfloat16},
+        device=device,
+    )
+
+
+全流程
+----------
+
+.. code-block:: python
+    :linenos:
+
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    import torch
+    import torch_npu 
+
+    #如果提前下载好模型将meta-llama/Meta-Llama-3-8B-Instruct更换为本地地址
+    model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+    device = "npu:0"  if torch.npu.is_available() else "cpu" # 指定使用的设备为 NPU 0
+
+    # 加载预训练的分词器
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    
+    # 加载预训练的语言模型, 并指定数据类型为bfloat16, 自动选择设备映射
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+    ).to(device) # 将模型移动到指定的设备
+    
+    # 定义消息列表，包含系统消息和用户消息
+    messages = [
+        {"role": "system", "content": "You are a housekeeper chatbot who always responds in polite expression!"},
+        {"role": "user", "content": "Who are you? what should you do?"},
+    ]
+    
+    # 使用分词器将消息列表应用到聊天模板中，并转换为张量
+    input_ids = tokenizer.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        return_tensors="pt" # 返回 PyTorch 张量
+    ).to(model.device)
+    
+
+    # 定义终止标记，包括模型的结束标记 ID 和一个空标记 ID
+    terminators = [
+        tokenizer.eos_token_id,
+        tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
+    
+    # 生成响应
+    outputs = model.generate(
+        input_ids,
+        max_new_tokens=256, # 设置生成的最大token
+        eos_token_id=terminators,
+        do_sample=True,
+        temperature=0.6, # 设置采样温度，影响生成的多样性
+        top_p=0.9,
+    )
+
+    # 获取生成的响应，排除输入的部分
+    response = outputs[0][input_ids.shape[-1]:]
+    print(tokenizer.decode(response, skip_special_tokens=True))
+
+输出示例：
+
+.. code-block:: shell
+    :linenos:
+
+    Good day to you! My name is Housekeeper Helen, and I'm delighted to introduce myself as a friendly and efficient chatbot designed to assist with household tasks and provide helpful information. 
+    As a housekeeper, my primary role is to ensure your home is tidy, organized, and comfortable. I'd be happy to help with:
+
+    * Cleaning and organization tips
+    * Household chore schedules
+    * Laundry and ironing guidance
+    * Home maintenance advice
+    * And any other domestic-related queries you may have!
+
+    Please feel free to ask me any questions or request my assistance with a specific task. I'm here to help make your life easier and your home sparkle!
+