Skip to content

Commit

Permalink
fix: ensure that utf-8 characters are not translated into \uXXXX form…
Browse files Browse the repository at this point in the history
…at (#965)
  • Loading branch information
ivanleomk authored Aug 31, 2024
1 parent b96e9a3 commit 02fcfe3
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 5 deletions.
9 changes: 4 additions & 5 deletions instructor/process_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def handle_response_model(
As a genius expert, your task is to understand the content and provide
the parsed objects in json that match the following json_schema:\n
{json.dumps(response_model.model_json_schema(), indent=2)}
{json.dumps(response_model.model_json_schema(), indent=2, ensure_ascii=False)}
Make sure to return an instance of the JSON, not the schema itself
"""
Expand Down Expand Up @@ -357,16 +357,15 @@ def handle_response_model(
You must only respond in JSON format that adheres to the following schema:
<JSON_SCHEMA>
{json.dumps(response_model.model_json_schema(), indent=2)}
{json.dumps(response_model.model_json_schema(), indent=2, ensure_ascii=False)}
</JSON_SCHEMA>
"""
new_kwargs["system"] = dedent(new_kwargs["system"])
else:
new_kwargs["system"] += dedent(f"""
You must only respond in JSON format that adheres to the following schema:
<JSON_SCHEMA>
{json.dumps(response_model.model_json_schema(), indent=2)}
{json.dumps(response_model.model_json_schema(), indent=2, ensure_ascii=False)}
</JSON_SCHEMA>
""")

Expand Down Expand Up @@ -439,7 +438,7 @@ def handle_response_model(
As a genius expert, your task is to understand the content and provide
the parsed objects in json that match the following json_schema:\n
{json.dumps(response_model.model_json_schema(), indent=2)}
{json.dumps(response_model.model_json_schema(), indent=2, ensure_ascii=False)}
Make sure to return an instance of the JSON, not the schema itself
"""
Expand Down
54 changes: 54 additions & 0 deletions tests/test_response_model_conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from instructor.process_response import handle_response_model
from pydantic import BaseModel, Field
import instructor
import pytest

modes = [
instructor.Mode.ANTHROPIC_JSON,
instructor.Mode.JSON,
instructor.Mode.MD_JSON,
instructor.Mode.GEMINI_JSON,
instructor.Mode.VERTEXAI_JSON,
]


def get_system_prompt(user_tool_definition, mode):
if mode == instructor.Mode.ANTHROPIC_JSON:
return user_tool_definition["system"]
elif mode == instructor.Mode.GEMINI_JSON:
return "\n".join(user_tool_definition["contents"][0]["parts"])
elif mode == instructor.Mode.VERTEXAI_JSON:
return str(user_tool_definition["generation_config"])
return user_tool_definition["messages"][0]["content"]


@pytest.mark.parametrize("mode", modes)
def test_json_preserves_description_of_non_english_characters_in_json_mode(
mode,
) -> None:
messages = [
{
"role": "user",
"content": "Extract the user from the text : 张三 20岁",
}
]

class User(BaseModel):
name: str = Field(description="用户的名字")
age: int = Field(description="用户的年龄")

_, user_tool_definition = handle_response_model(User, mode=mode, messages=messages)

system_prompt = get_system_prompt(user_tool_definition, mode)
assert "用户的名字" in system_prompt
assert "用户的年龄" in system_prompt

_, user_tool_definition = handle_response_model(
User,
mode=mode,
system="你是一个AI助手",
messages=messages,
)
system_prompt = get_system_prompt(user_tool_definition, mode)
assert "用户的名字" in system_prompt
assert "用户的年龄" in system_prompt

0 comments on commit 02fcfe3

Please sign in to comment.