diff --git a/libs/core/langchain_core/output_parsers/pydantic.py b/libs/core/langchain_core/output_parsers/pydantic.py index b48dca9d28a..b8f782ffa2f 100644 --- a/libs/core/langchain_core/output_parsers/pydantic.py +++ b/libs/core/langchain_core/output_parsers/pydantic.py @@ -92,7 +92,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]): if "type" in reduced_schema: del reduced_schema["type"] # Ensure json in context is well-formed with double quotes. - schema_str = json.dumps(reduced_schema) + schema_str = json.dumps(reduced_schema, ensure_ascii=False) return _PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str) diff --git a/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py b/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py index f8c7d42c709..d390ee541b6 100644 --- a/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py +++ b/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py @@ -100,3 +100,24 @@ def test_pydantic_output_parser_type_inference() -> None: "title": "SampleModel", "type": "object", } + + +def test_format_instructions_preserves_language() -> None: + """Test format instructions does not attempt to encode into ascii.""" + from langchain_core.pydantic_v1 import BaseModel, Field + + description = ( + "你好, こんにちは, नमस्ते, Bonjour, Hola, " + "Olá, 안녕하세요, Jambo, Merhaba, Γειά σου" + ) + + class Foo(BaseModel): + hello: str = Field( + description=( + "你好, こんにちは, नमस्ते, Bonjour, Hola, " + "Olá, 안녕하세요, Jambo, Merhaba, Γειά σου" + ) + ) + + parser = PydanticOutputParser(pydantic_object=Foo) # type: ignore + assert description in parser.get_format_instructions()