feat: support qwen2.5 tools_call

akai-shuuichi · Nov 12, 2024 · 2b4f89d · 2b4f89d
1 parent 47b0d1a
commit 2b4f89d
Show file tree

Hide file tree

Showing 5 changed files with 123 additions and 5 deletions.
diff --git a/docs/en/llm/api_server_tools.md b/docs/en/llm/api_server_tools.md
@@ -1,6 +1,6 @@
 # Tools Calling
 
-LMDeploy supports tools for InternLM2, InternLM2.5 and llama3.1 models.
+LMDeploy supports tools for InternLM2, InternLM2.5, llama3.1 and Qwen2.5 models.
 
 ## Single Round Invocation
 

diff --git a/docs/zh_cn/llm/api_server_tools.md b/docs/zh_cn/llm/api_server_tools.md
@@ -1,6 +1,6 @@
 # Tools
 
-LMDeploy 支持 InternLM2, InternLM2.5 和 Llama3.1 模型的工具调用。
+LMDeploy 支持 InternLM2, InternLM2.5, Llama3.1 和 Qwen2.5模型的工具调用。
 
 ## 单轮调用
 

diff --git a/lmdeploy/model.py b/lmdeploy/model.py
@@ -892,6 +892,113 @@ def match(cls, model_path: str) -> Optional[str]:
             return 'llama3_1'
 
 
+@MODELS.register_module(name='qwen2d5')
+class Qwen2halfChat(BaseChatTemplate):
+    """Chat template for Qwen-7B-Chat."""
+
+    def __init__(
+            self,
+            system='<|im_start|>system\n',
+            meta_instruction='You are Qwen, created by Alibaba Cloud. You are a helpful assistant.',
+            eosys='<|im_end|>\n',
+            user='<|im_start|>user\n',
+            eoh='<|im_end|>\n',
+            assistant='<|im_start|>assistant\n',
+            eoa='<|im_end|>',
+            separator='\n',
+            tools="""
+                     \n\n#Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>
+                    """,
+            eotools="""
+                    \n\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{"name": <function-name>, "arguments": <args-json-object>}\n</tool_call><|im_end|>\
+                    """,
+            stop_words=['<|im_end|>'],
+            **kwargs):
+
+        self.tools = tools
+        self.eotools = eotools
+        super().__init__(system=system,
+                         meta_instruction=meta_instruction,
+                         eosys=eosys,
+                         user=user,
+                         eoh=eoh,
+                         assistant=assistant,
+                         eoa=eoa,
+                         separator=separator,
+                         stop_words=stop_words,
+                         **kwargs)
+
+    def messages2prompt(self,
+                        messages,
+                        sequence_start=True,
+                        tools=None,
+                        **kwargs):
+        """Return the prompt that is concatenated with other elements in the
+        chat template.
+
+        Args:
+            messages (str | List): user's input prompt
+        Returns:
+            str: the concatenated prompt
+        """
+        if isinstance(messages, str):
+            return self.get_prompt(messages, sequence_start)
+        box_map = dict(user=self.user,
+                       assistant=self.assistant,
+                       system=self.system)
+        ret = ''
+        tool_prompt = ''
+        if tools is not None:
+            for tool in tools:
+                tool_prompt += json.dumps(tool, ensure_ascii=False)
+            if len(messages) and messages[0]['role'] == 'system':
+                ret += f"{messages[0]['content']}{self.tools}{tool_prompt}{self.eotools}{self.meta_instruction}{self.eosys}"
+            else:
+                ret += f'{self.system}{self.meta_instruction}{self.tools}{tool_prompt}{self.eotools}{self.eosys}'
+        else:
+            if self.meta_instruction is not None and sequence_start:
+                if len(messages) and messages[0]['role'] == 'system':
+                    ret += f"{self.system}{messages[0]['content']}{self.eosys}"
+                else:
+                    ret += f'{self.system}{self.meta_instruction}{self.eosys}'
+
+        for index, message in enumerate(messages):
+            if (message['role'] == 'user'
+                    or (message['role'] == 'system' and index != 0)
+                    or (message['role'] == 'assistant'
+                        and message['tools_call'] is not None)):
+                ret += f"{box_map[message['role']]}\n{message['content']}\n{self.eoh}"
+            if message['role'] == 'assistant':
+                ret += f"{box_map[message['role']]}"
+                if message['content'] is not None:
+                    ret += f"\n{message['content']}"
+            if message.get('tools_call') is not None:
+                toolsCall = message['tools_call']
+                for toolCall in toolsCall:
+                    if toolCall['function'] is not None:
+                        toolCall = toolCall['function']
+                    ret += f'\n<tool_call>\n{{"name": "{toolCall["name"]}, "arguments": {json.dumps(tools["arguments"])}"\n</toolcall>}}'
+            if message['role'] == 'tool':
+                if index == 0 or messages[index - 1]['role'] != 'tool':
+                    ret += f'{self.user}'
+                ret += f"\n<tool_response>\n{message['content']}\n</tool_response>"
+                if index == len(messages) - 1 or messages[index +
+                                                          1]['role'] != 'tool':
+                    ret += f'{self.eoh}'
+        ret += f'{self.assistant}'
+        return ret
+
+    @classmethod
+    def match(cls, model_path: str) -> Optional[str]:
+        """Return the model_name that was registered to MODELS.
+
+        Args:
+            model_path (str): the model path used for matching.
+        """
+        if 'qwen2.5' in model_path.lower():
+            return 'qwen2d5'
+
+
 @MODELS.register_module(name='minicpmv-2d6')
 @MODELS.register_module(name='minicpm3')
 @MODELS.register_module(name='qwen')
@@ -927,7 +1034,8 @@ def match(cls, model_path: str) -> Optional[str]:
         Args:
             model_path (str): the model path used for matching.
         """
-        if 'qwen' in model_path.lower():
+        if 'qwen' in model_path.lower() and 'qwen2.5' not in model_path.lower(
+        ):
             return 'qwen'
         if 'minicpm-v-2_6' in model_path.lower():
             return 'minicpmv-2d6'

diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py
@@ -652,6 +652,16 @@ def parse_tool_response(self, text, tools, **kwargs):
             action, _ = text.split('</function>')
             parameters = action[action.find('{'):]
             name = action.split('<function=')[1].split('>{')[0]
+        elif '<tool_call>' in text:  # qwen2.5
+            action = text.split('</tool_call>')[0]
+            action = action.split('<tool_call>')
+            if len(action) == 1:
+                text, action = '', action[0]
+            else:
+                text, action = action
+            action = json.loads(action)
+            name, parameters = action['name'], json.dumps(
+                action.get('parameters', action.get('arguments', {})))
         else:
             raise RuntimeError(f'Unexpected model response: {text}')
         action_id = [tool.function.name for tool in tools].index(name)

diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py
@@ -495,8 +495,8 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
             final_logprobs.extend(res.logprobs)
 
     tool_calls = None
-    if request.tool_choice != 'none' and ('<|plugin|>' in text
-                                          or '<function=' in text):
+    if request.tool_choice != 'none' and ('<|plugin|>' in text or '<function='
+                                          in text or '<tool_call>' in text):
         if final_res.finish_reason == 'stop':
             final_res.finish_reason = 'tool_calls'
         try:  # TODO add json_schema guidance to turbomind