[Doc] quick start swap tabs (#1263)

* [doc] quick start swap tabs * update docs * update * update * update * update * update * update * update
open-compass · Jul 5, 2024 · 1d3a26c · 1d3a26c
1 parent 68ca484
commit 1d3a26c
Show file tree

Hide file tree

Showing 23 changed files with 515 additions and 191 deletions.
diff --git a/configs/datasets/demo/demo_cmmlu_base_ppl.py b/configs/datasets/demo/demo_cmmlu_base_ppl.py
@@ -0,0 +1,8 @@
+from mmengine import read_base
+
+with read_base():
+    from ..cmmlu.cmmlu_ppl_041cbf import cmmlu_datasets
+
+for d in cmmlu_datasets:
+    d['abbr'] = 'demo_' + d['abbr']
+    d['reader_cfg']['test_range'] = '[0:4]'
diff --git a/configs/datasets/demo/demo_cmmlu_chat_gen.py b/configs/datasets/demo/demo_cmmlu_chat_gen.py
@@ -0,0 +1,8 @@
+from mmengine import read_base
+
+with read_base():
+    from ..cmmlu.cmmlu_gen_c13365 import cmmlu_datasets
+
+for d in cmmlu_datasets:
+    d['abbr'] = 'demo_' + d['abbr']
+    d['reader_cfg']['test_range'] = '[0:4]'
diff --git a/configs/datasets/demo/demo_gsm8k_base_gen.py b/configs/datasets/demo/demo_gsm8k_base_gen.py
@@ -0,0 +1,7 @@
+from mmengine import read_base
+
+with read_base():
+    from ..gsm8k.gsm8k_gen_17d0dc import gsm8k_datasets
+
+gsm8k_datasets[0]['abbr'] = 'demo_' + gsm8k_datasets[0]['abbr']
+gsm8k_datasets[0]['reader_cfg']['test_range'] = '[0:64]'
diff --git a/configs/datasets/demo/demo_gsm8k_chat_gen.py b/configs/datasets/demo/demo_gsm8k_chat_gen.py
@@ -0,0 +1,7 @@
+from mmengine import read_base
+
+with read_base():
+    from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
+
+gsm8k_datasets[0]['abbr'] = 'demo_' + gsm8k_datasets[0]['abbr']
+gsm8k_datasets[0]['reader_cfg']['test_range'] = '[0:64]'
diff --git a/configs/datasets/demo/demo_math_base_gen.py b/configs/datasets/demo/demo_math_base_gen.py
@@ -0,0 +1,7 @@
+from mmengine import read_base
+
+with read_base():
+    from ..math.math_4shot_base_gen_db136b import math_datasets
+
+math_datasets[0]['abbr'] = 'demo_' + math_datasets[0]['abbr']
+math_datasets[0]['reader_cfg']['test_range'] = '[0:64]'
diff --git a/configs/datasets/demo/demo_math_chat_gen.py b/configs/datasets/demo/demo_math_chat_gen.py
@@ -0,0 +1,7 @@
+from mmengine import read_base
+
+with read_base():
+    from ..math.math_0shot_gen_393424 import math_datasets
+
+math_datasets[0]['abbr'] = 'demo_' + math_datasets[0]['abbr']
+math_datasets[0]['reader_cfg']['test_range'] = '[0:64]'
diff --git a/configs/eval_base_demo.py b/configs/eval_base_demo.py
@@ -0,0 +1,10 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .datasets.demo.demo_gsm8k_base_gen import gsm8k_datasets
+    from .datasets.demo.demo_math_base_gen import math_datasets
+    from .models.qwen.hf_qwen2_1_5b import models as hf_qwen2_1_5b_models
+    from .models.hf_internlm.hf_internlm2_1_8b import models as hf_internlm2_1_8b_models
+
+datasets = gsm8k_datasets + math_datasets
+models = hf_qwen2_1_5b_models + hf_internlm2_1_8b_models
diff --git a/configs/eval_chat_demo.py b/configs/eval_chat_demo.py
@@ -0,0 +1,10 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .datasets.demo.demo_gsm8k_chat_gen import gsm8k_datasets
+    from .datasets.demo.demo_math_chat_gen import math_datasets
+    from .models.qwen.hf_qwen2_1_5b_instruct import models as hf_qwen2_1_5b_instruct_models
+    from .models.hf_internlm.hf_internlm2_chat_1_8b import models as hf_internlm2_chat_1_8b_models
+
+datasets = gsm8k_datasets + math_datasets
+models = hf_qwen2_1_5b_instruct_models + hf_internlm2_chat_1_8b_models
diff --git a/configs/eval_demo.py b/configs/eval_demo.py
diff --git a/configs/models/hf_internlm/hf_internlm2_5_7b_chat.py b/configs/models/hf_internlm/hf_internlm2_5_7b_chat.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=1),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_1_8b.py b/configs/models/hf_internlm/hf_internlm2_chat_1_8b.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=1),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_1_8b_sft.py b/configs/models/hf_internlm/hf_internlm2_chat_1_8b_sft.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=1),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_20b.py b/configs/models/hf_internlm/hf_internlm2_chat_20b.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=2),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_20b_sft.py b/configs/models/hf_internlm/hf_internlm2_chat_20b_sft.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=2),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_7b.py b/configs/models/hf_internlm/hf_internlm2_chat_7b.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=1),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_7b_sft.py b/configs/models/hf_internlm/hf_internlm2_chat_7b_sft.py
@@ -8,6 +8,5 @@
         max_out_len=1024,
         batch_size=8,
         run_cfg=dict(num_gpus=1),
-        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_math_20b_with_system.py b/configs/models/hf_internlm/hf_internlm2_chat_math_20b_with_system.py
@@ -3,9 +3,9 @@
 
 _meta_template = dict(
     round=[
-        dict(role='HUMAN', begin='[UNUSED_TOKEN_146]user\n', end='[UNUSED_TOKEN_145]\n'),
-        dict(role='SYSTEM', begin='[UNUSED_TOKEN_146]system\n', end='[UNUSED_TOKEN_145]\n'),
-        dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),
+        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
+        dict(role='SYSTEM', begin='<|im_start|>system\n', end='<|im_end|>\n'),
+        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
     ],
 )
 
@@ -30,6 +30,6 @@
         batch_size=8,
         meta_template=_meta_template,
         run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='[UNUSED_TOKEN_145]',
+        end_str='<|im_end|>',
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_math_7b_with_system.py b/configs/models/hf_internlm/hf_internlm2_chat_math_7b_with_system.py
@@ -3,9 +3,9 @@
 
 _meta_template = dict(
     round=[
-        dict(role='HUMAN', begin='[UNUSED_TOKEN_146]user\n', end='[UNUSED_TOKEN_145]\n'),
-        dict(role='SYSTEM', begin='[UNUSED_TOKEN_146]system\n', end='[UNUSED_TOKEN_145]\n'),
-        dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),
+        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
+        dict(role='SYSTEM', begin='<|im_start|>system\n', end='<|im_end|>\n'),
+        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
     ],
 )
 
@@ -30,6 +30,6 @@
         batch_size=8,
         meta_template=_meta_template,
         run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='[UNUSED_TOKEN_145]',
+        end_str='<|im_end|>',
     )
 ]
diff --git a/docs/en/get_started/quick_start.md b/docs/en/get_started/quick_start.md
@@ -22,7 +22,43 @@ For larger parameterized models like Llama-7B, refer to other examples provided
 In OpenCompass, each evaluation task consists of the model to be evaluated and the dataset. The entry point for evaluation is `run.py`. Users can select the model and dataset to be tested either via command line or configuration files.
 
 `````{tabs}
+````{tab} Command Line (Custom HF Model)
+
+For HuggingFace models, users can set model parameters directly through the command line without additional configuration files. For instance, for the `facebook/opt-125m` model, you can evaluate it with the following command:
+
+```bash
+python run.py --datasets siqa_gen winograd_ppl \
+--hf-type base \
+--hf-path facebook/opt-125m
+```
 
+Note that in this way, OpenCompass only evaluates one model at a time, while other ways can evaluate multiple models at once.
+
+```{caution}
+`--hf-num-gpus` does not stand for the actual number of GPUs to use in evaluation, but the minimum required number of GPUs for this model. [More](faq.md#how-does-opencompass-allocate-gpus)
+```
+
+:::{dropdown} More detailed example
+:animate: fade-in-slide-down
+```bash
+python run.py --datasets siqa_gen winograd_ppl \
+--hf-type base \  # HuggingFace model type, base or chat
+--hf-path facebook/opt-125m \  # HuggingFace model path
+--tokenizer-path facebook/opt-125m \  # HuggingFace tokenizer path (if the same as the model path, can be omitted)
+--tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \  # Arguments to construct the tokenizer
+--model-kwargs device_map='auto' \  # Arguments to construct the model
+--max-seq-len 2048 \  # Maximum sequence length the model can accept
+--max-out-len 100 \  # Maximum number of tokens to generate
+--min-out-len 100 \  # Minimum number of tokens to generate
+--batch-size 64  \  # Batch size
+--hf-num-gpus 1  # Number of GPUs required to run the model
+```
+```{seealso}
+For all HuggingFace related parameters supported by `run.py`, please read [Launching Evaluation Task](../user_guides/experimentation.md#launching-an-evaluation-task).
+```
+:::
+
+````
 ````{tab} Command Line
 
 Users can combine the models and datasets they want to test using `--models` and `--datasets`.
@@ -74,44 +110,6 @@ If you want to evaluate other models, please check out the "Command Line (Custom
 
 ````
 
-````{tab} Command Line (Custom HF Model)
-
-For HuggingFace models, users can set model parameters directly through the command line without additional configuration files. For instance, for the `facebook/opt-125m` model, you can evaluate it with the following command:
-
-```bash
-python run.py --datasets siqa_gen winograd_ppl \
---hf-type base \
---hf-path facebook/opt-125m
-```
-
-Note that in this way, OpenCompass only evaluates one model at a time, while other ways can evaluate multiple models at once.
-
-```{caution}
-`--hf-num-gpus` does not stand for the actual number of GPUs to use in evaluation, but the minimum required number of GPUs for this model. [More](faq.md#how-does-opencompass-allocate-gpus)
-```
-
-:::{dropdown} More detailed example
-:animate: fade-in-slide-down
-```bash
-python run.py --datasets siqa_gen winograd_ppl \
---hf-type base \  # HuggingFace model type, base or chat
---hf-path facebook/opt-125m \  # HuggingFace model path
---tokenizer-path facebook/opt-125m \  # HuggingFace tokenizer path (if the same as the model path, can be omitted)
---tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \  # Arguments to construct the tokenizer
---model-kwargs device_map='auto' \  # Arguments to construct the model
---max-seq-len 2048 \  # Maximum sequence length the model can accept
---max-out-len 100 \  # Maximum number of tokens to generate
---min-out-len 100 \  # Minimum number of tokens to generate
---batch-size 64  \  # Batch size
---hf-num-gpus 1  # Number of GPUs required to run the model
-```
-```{seealso}
-For all HuggingFace related parameters supported by `run.py`, please read [Launching Evaluation Task](../user_guides/experimentation.md#launching-an-evaluation-task).
-```
-:::
-
-
-````
 ````{tab} Configuration File
 
 In addition to configuring the experiment through the command line, OpenCompass also allows users to write the full configuration of the experiment in a configuration file and run it directly through `run.py`. The configuration file is organized in Python format and must include the `datasets` and `models` fields.