diff --git a/lmdeploy/model.py b/lmdeploy/model.py
index 6eec89b7b4..1916a77870 100644
--- a/lmdeploy/model.py
+++ b/lmdeploy/model.py
@@ -891,27 +891,29 @@ def match(cls, model_path: str) -> Optional[str]:
) or 'llama3.2-' in model_path.lower():
return 'llama3_1'
+
@MODELS.register_module(name='qwen2d5')
class Qwen2halfChat(BaseChatTemplate):
"""Chat template for Qwen-7B-Chat."""
- def __init__(self,
- system='<|im_start|>system\n',
- meta_instruction='You are Qwen, created by Alibaba Cloud. You are a helpful assistant.',
- eosys='<|im_end|>\n',
- user='<|im_start|>user\n',
- eoh='<|im_end|>\n',
- assistant='<|im_start|>assistant\n',
- eoa='<|im_end|>',
- separator='\n',
- tools="""
+ def __init__(
+ self,
+ system='<|im_start|>system\n',
+ meta_instruction='You are Qwen, created by Alibaba Cloud. You are a helpful assistant.',
+ eosys='<|im_end|>\n',
+ user='<|im_start|>user\n',
+ eoh='<|im_end|>\n',
+ assistant='<|im_start|>assistant\n',
+ eoa='<|im_end|>',
+ separator='\n',
+ tools="""
\n\n#Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n
""",
- eotools="""
+ eotools="""
\n\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{"name": , "arguments": }\n<|im_end|>\
""",
- stop_words=['<|im_end|>'],
- **kwargs):
+ stop_words=['<|im_end|>'],
+ **kwargs):
self.tools = tools
self.eotools = eotools
@@ -925,11 +927,15 @@ def __init__(self,
separator=separator,
stop_words=stop_words,
**kwargs)
- def messages2prompt(self, messages, sequence_start=True,
- tools=None, **kwargs):
+ def messages2prompt(self,
+ messages,
+ sequence_start=True,
+ tools=None,
+ **kwargs):
"""Return the prompt that is concatenated with other elements in the
chat template.
+
Args:
messages (str | List): user's input prompt
Returns:
@@ -948,16 +954,19 @@ def messages2prompt(self, messages, sequence_start=True,
if len(messages) and messages[0]['role'] == 'system':
ret += f"{messages[0]['content']}{self.tools}{tool_prompt}{self.eotools}{self.meta_instruction}{self.eosys}"
else:
- ret += f"{self.system}{self.meta_instruction}{self.tools}{tool_prompt}{self.eotools}{self.eosys}"
+ ret += f'{self.system}{self.meta_instruction}{self.tools}{tool_prompt}{self.eotools}{self.eosys}'
else:
if self.meta_instruction is not None and sequence_start:
if len(messages) and messages[0]['role'] == 'system':
ret += f"{self.system}{messages[0]['content']}{self.eosys}"
else:
- ret += f"{self.system}{self.meta_instruction}{self.eosys}"
+ ret += f'{self.system}{self.meta_instruction}{self.eosys}'
for index, message in enumerate(messages):
- if (message['role'] == 'user' or (message['role'] == 'system' and index != 0) or (message['role'] == 'assistant' and message['tools_call'] is not None)):
+ if (message['role'] == 'user'
+ or (message['role'] == 'system' and index != 0)
+ or (message['role'] == 'assistant'
+ and message['tools_call'] is not None)):
ret += f"{box_map[message['role']]}\n{message['content']}\n{self.eoh}"
if message['role'] == 'assistant':
ret += f"{box_map[message['role']]}"
@@ -971,10 +980,11 @@ def messages2prompt(self, messages, sequence_start=True,
ret += f'\n\n{{"name": "{toolCall["name"]}, "arguments": {json.dumps(tools["arguments"])}"\n}}'
if message['role'] == 'tool':
if index == 0 or messages[index - 1]['role'] != 'tool':
- ret += f"{self.user}"
+ ret += f'{self.user}'
ret += f"\n\n{message['content']}\n"
- if index == len(messages) - 1 or messages[index + 1]['role'] != 'tool':
- ret += f"{self.eoh}"
+ if index == len(messages) - 1 or messages[index +
+ 1]['role'] != 'tool':
+ ret += f'{self.eoh}'
ret += f'{self.assistant}'
return ret
@@ -988,6 +998,7 @@ def match(cls, model_path: str) -> Optional[str]:
if 'qwen2.5' in model_path.lower():
return 'qwen2d5'
+
@MODELS.register_module(name='minicpmv-2d6')
@MODELS.register_module(name='minicpm3')
@MODELS.register_module(name='qwen')
@@ -1023,7 +1034,8 @@ def match(cls, model_path: str) -> Optional[str]:
Args:
model_path (str): the model path used for matching.
"""
- if 'qwen' in model_path.lower() and 'qwen2.5' not in model_path.lower():
+ if 'qwen' in model_path.lower() and 'qwen2.5' not in model_path.lower(
+ ):
return 'qwen'
if 'minicpm-v-2_6' in model_path.lower():
return 'minicpmv-2d6'
diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py
index e0708dec08..3a90f7e634 100644
--- a/lmdeploy/serve/openai/api_server.py
+++ b/lmdeploy/serve/openai/api_server.py
@@ -495,9 +495,8 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
final_logprobs.extend(res.logprobs)
tool_calls = None
- if request.tool_choice != 'none' and ('<|plugin|>' in text
- or '' in text):
+ if request.tool_choice != 'none' and ('<|plugin|>' in text or '' in text):
if final_res.finish_reason == 'stop':
final_res.finish_reason = 'tool_calls'
try: # TODO add json_schema guidance to turbomind