From dc51cc569036bc4a5f510f87437d91bcc5f3dc25 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Wed, 14 Aug 2024 09:54:31 -0400 Subject: [PATCH] core[minor]: Prevent PydanticOutputParser from encoding schema as ASCII (#25386) This allows users to provide parameter descriptions in the pydantic models in other languages. Continuing this PR: https://github.com/langchain-ai/langchain/pull/24809 --- .../langchain_core/output_parsers/pydantic.py | 2 +- .../output_parsers/test_pydantic_parser.py | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/libs/core/langchain_core/output_parsers/pydantic.py b/libs/core/langchain_core/output_parsers/pydantic.py index b48dca9d28a..b8f782ffa2f 100644 --- a/libs/core/langchain_core/output_parsers/pydantic.py +++ b/libs/core/langchain_core/output_parsers/pydantic.py @@ -92,7 +92,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]): if "type" in reduced_schema: del reduced_schema["type"] # Ensure json in context is well-formed with double quotes. - schema_str = json.dumps(reduced_schema) + schema_str = json.dumps(reduced_schema, ensure_ascii=False) return _PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str) diff --git a/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py b/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py index f8c7d42c709..d390ee541b6 100644 --- a/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py +++ b/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py @@ -100,3 +100,24 @@ def test_pydantic_output_parser_type_inference() -> None: "title": "SampleModel", "type": "object", } + + +def test_format_instructions_preserves_language() -> None: + """Test format instructions does not attempt to encode into ascii.""" + from langchain_core.pydantic_v1 import BaseModel, Field + + description = ( + "你好, こんにちは, नमस्ते, Bonjour, Hola, " + "Olá, 안녕하세요, Jambo, Merhaba, Γειά σου" + ) + + class Foo(BaseModel): + hello: str = Field( + description=( + "你好, こんにちは, नमस्ते, Bonjour, Hola, " + "Olá, 안녕하세요, Jambo, Merhaba, Γειά σου" + ) + ) + + parser = PydanticOutputParser(pydantic_object=Foo) # type: ignore + assert description in parser.get_format_instructions()