diff --git a/libs/core/langchain_core/output_parsers/json.py b/libs/core/langchain_core/output_parsers/json.py index e9d3669e44e..18c1257a9ad 100644 --- a/libs/core/langchain_core/output_parsers/json.py +++ b/libs/core/langchain_core/output_parsers/json.py @@ -115,7 +115,7 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]): if "type" in reduced_schema: del reduced_schema["type"] # Ensure json in context is well-formed with double quotes. - schema_str = json.dumps(reduced_schema) + schema_str = json.dumps(reduced_schema, ensure_ascii=False) return JSON_FORMAT_INSTRUCTIONS.format(schema=schema_str) @property diff --git a/libs/core/tests/unit_tests/output_parsers/test_json.py b/libs/core/tests/unit_tests/output_parsers/test_json.py index 96cf6d0cc4d..326cfc16cd9 100644 --- a/libs/core/tests/unit_tests/output_parsers/test_json.py +++ b/libs/core/tests/unit_tests/output_parsers/test_json.py @@ -3,7 +3,7 @@ from collections.abc import AsyncIterator, Iterator from typing import Any import pytest -from pydantic import BaseModel +from pydantic import BaseModel, Field from langchain_core.exceptions import OutputParserException from langchain_core.output_parsers.json import ( @@ -603,3 +603,16 @@ def test_base_model_schema_consistency() -> None: assert initial_joke_schema == retrieved_joke_schema assert openai_func.get("name", None) is not None + + +def test_unicode_handling() -> None: + """Tests if the JsonOutputParser is able to process unicodes.""" + + class Sample(BaseModel): + title: str = Field(description="科学文章的标题") + + parser = SimpleJsonOutputParser(pydantic_object=Sample) + format_instructions = parser.get_format_instructions() + assert ( + "科学文章的标题" in format_instructions + ), "Unicode characters should not be escaped"