From 3cfb02f2fda3e85f6ae1b7b0b6db280a259e0277 Mon Sep 17 00:00:00 2001
From: gaozixiang <gaozixiang1@xiaomi.com>
Date: Mon, 11 Nov 2024 15:30:03 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81qwen2.5=20fuction=5Fc?=
 =?UTF-8?q?all?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lmdeploy/model.py                   | 112 ++++++++++++++++++++++++++--
 lmdeploy/serve/async_engine.py      |   5 ++
 lmdeploy/serve/openai/api_server.py |   3 +-
 3 files changed, 113 insertions(+), 7 deletions(-)
diff --git a/lmdeploy/model.py b/lmdeploy/model.py
index 2b3a0a4e1..5f9302c0f 100644
--- a/lmdeploy/model.py
+++ b/lmdeploy/model.py
@@ -891,6 +891,110 @@ def match(cls, model_path: str) -> Optional[str]:
         ) or 'llama3.2-' in model_path.lower():
             return 'llama3_1'
 
+@MODELS.register_module(name='qwen2d5')
+class Qwen2halfChat(BaseChatTemplate):
+    """Chat template for Qwen-7B-Chat."""
+
+    def __init__(self,
+                 system='<|im_start|>system\n',
+                 meta_instruction='You are Qwen, created by Alibaba Cloud. You are a helpful assistant.',
+                 eosys='<|im_end|>\n',
+                 user='<|im_start|>user\n',
+                 eoh='<|im_end|>\n',
+                 assistant='<|im_start|>assistant\n',
+                 eoa='<|im_end|>',
+                 separator='\n',
+                 tools="""
+                     \n\n#Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>
+                    """,
+                 eotools="""
+                    \n\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{"name": <function-name>, "arguments": <args-json-object>}\n</tool_call><|im_end|>\
+""",
+                 stop_words=['<|im_end|>'],
+                 **kwargs):
+
+        self.tools = tools
+        self.eotools = eotools
+        super().__init__(system=system,
+                         meta_instruction=meta_instruction,
+                         eosys=eosys,
+                         user=user,
+                         eoh=eoh,
+                         assistant=assistant,
+                         eoa=eoa,
+                         separator=separator,
+                         stop_words=stop_words,
+                         **kwargs)
+    def messages2prompt(self, messages, sequence_start=True,
+                        tools=None,  **kwargs):
+
+        """Return the prompt that is concatenated with other elements in the
+        chat template.
+        Args:
+            messages (str | List): user's input prompt
+        Returns:
+            str: the concatenated prompt
+        """
+        if isinstance(messages, str):
+            return self.get_prompt(messages, sequence_start)
+        box_map = dict(user=self.user,
+                       assistant=self.assistant,
+                       system=self.system)
+        eox_map = dict(user=self.eoh,
+                       assistant=self.eoa + self.separator,
+                       system=self.eosys)
+        ret = ''
+        tool_prompt = ''
+        if tools is not None:
+            for tool in tools:
+                tool_prompt += json.dumps(tool, ensure_ascii=False)
+            if len(messages) and messages[0]['role'] == 'system':
+                ret += f"{messages[0]['content']}{self.tools}{tool_prompt}{self.eotools}{self.meta_instruction}{self.eosys}"
+            else:
+                ret += f"{self.system}{self.meta_instruction}{self.tools}{tool_prompt}{self.eotools}{self.eosys}"
+        else:
+            if self.meta_instruction is not None and sequence_start:
+                if len(messages) and messages[0]['role'] == 'system':
+                    ret += f"{self.system}{messages[0]['content']}{self.eosys}"
+                else:
+                    ret += f"{self.system}{self.meta_instruction}{self.eosys}"
+
+
+        for index, message in enumerate(messages):
+            if (message['role'] == 'user' or (message['role'] == 'system' and index != 0) or (message['role'] == 'assistant' and message['tools_call'] is not None)):
+                ret += f"{box_map[message['role']]}\n{message['content']}\n{self.eoh}"
+            if message['role'] == 'assistant':
+                ret += f"{box_map[message['role']]}"
+                if message['content'] is not None:
+                    ret += f"\n{message['content']}"
+            if message.get('tools_call') is not None:
+                toolsCall = message['tools_call']
+                for toolCall in toolsCall:
+                    if toolCall['function'] is not None:
+                        toolCall = toolCall['function']
+                    ret += f'\n<tool_call>\n{{"name": "{toolCall["name"]}, "arguments": {json.dumps(tools["arguments"])}"\n</toolcall>}}'
+            if message['role'] == 'tool':
+                if index == 0 or messages[index - 1]['role'] != 'tool':
+                    ret += f"{box_map[message['role']]}"
+                ret += f"\n<tool_response>\n{message['content']}\n</tool_response>"
+                if index == len(messages) - 1 or messages[index + 1]['role'] != 'tool':
+                    ret += f"{self.eoh}"
+        ret += f'{self.assistant}'
+        return ret
+
+    @classmethod
+    def match(cls, model_path: str) -> Optional[str]:
+        """Return the model_name that was registered to MODELS.
+
+        Args:
+            model_path (str): the model path used for matching.
+        """
+        if 'qwen' in model_path.lower() and 'qwen2.5' not in model_path.lower():
+            return 'qwen'
+        if 'minicpm-v-2_6' in model_path.lower():
+            return 'minicpmv-2d6'
+        if 'minicpm3-' in model_path.lower():
+            return 'minicpm3'
 
 @MODELS.register_module(name='minicpmv-2d6')
 @MODELS.register_module(name='minicpm3')
@@ -927,12 +1031,8 @@ def match(cls, model_path: str) -> Optional[str]:
         Args:
             model_path (str): the model path used for matching.
         """
-        if 'qwen' in model_path.lower():
-            return 'qwen'
-        if 'minicpm-v-2_6' in model_path.lower():
-            return 'minicpmv-2d6'
-        if 'minicpm3-' in model_path.lower():
-            return 'minicpm3'
+        if 'qwen2.5' in model_path.lower():
+            return 'qwen2d5'
 
 
 @MODELS.register_module(name='codellama')
diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py
index 3c8f193cd..5fd544678 100644
--- a/lmdeploy/serve/async_engine.py
+++ b/lmdeploy/serve/async_engine.py
@@ -652,6 +652,11 @@ def parse_tool_response(self, text, tools, **kwargs):
             action, _ = text.split('</function>')
             parameters = action[action.find('{'):]
             name = action.split('<function=')[1].split('>{')[0]
+        elif '<tool_call>' in text: #qwen2.5
+            action,text = text.split('</tool_call>')
+            action = action.split('<tool_call>')[1]
+            action = json.loads(action)
+            name, parameters = action['name'], json.dumps(action.get('parameters', action.get('arguments', {})))
         else:
             raise RuntimeError(f'Unexpected model response: {text}')
         action_id = [tool.function.name for tool in tools].index(name)
diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py
index a12cadaa7..e0708dec0 100644
--- a/lmdeploy/serve/openai/api_server.py
+++ b/lmdeploy/serve/openai/api_server.py
@@ -496,7 +496,8 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
 
     tool_calls = None
     if request.tool_choice != 'none' and ('<|plugin|>' in text
-                                          or '<function=' in text):
+                                          or '<function=' in text
+                                          or '<tool_call>' in text):
         if final_res.finish_reason == 'stop':
             final_res.finish_reason = 'tool_calls'
         try:  # TODO add json_schema guidance to turbomind