From 8d6cc90fc52195da99e633ddd1e9662c6c826d8b Mon Sep 17 00:00:00 2001 From: JongRok BAEK <54343137+L-cloud@users.noreply.github.com> Date: Wed, 14 Feb 2024 06:30:53 +0900 Subject: [PATCH] langchain.core : Use shallow copy for schema manipulation in JsonOutputParser.get_format_instructions (#17162) - **Description :** Fix: Use shallow copy for schema manipulation in get_format_instructions Prevents side effects on the original schema object by using a dictionary comprehension for a safer and more controlled manipulation of schema key-value pairs, enhancing code reliability. - **Issue:** #17161 - **Dependencies:** None - **Twitter handle:** None --- libs/core/langchain_core/output_parsers/json.py | 3 ++- .../tests/unit_tests/output_parsers/test_json.py | 16 ++++++++++++++++ .../langchain/output_parsers/pydantic.py | 3 ++- libs/langchain/langchain/output_parsers/yaml.py | 3 ++- 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/output_parsers/json.py b/libs/core/langchain_core/output_parsers/json.py index d16316596a7..c107cd0c765 100644 --- a/libs/core/langchain_core/output_parsers/json.py +++ b/libs/core/langchain_core/output_parsers/json.py @@ -221,7 +221,8 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]): if self.pydantic_object is None: return "Return a JSON object." else: - schema = self.pydantic_object.schema() + # Copy schema to avoid altering original Pydantic schema. + schema = {k: v for k, v in self.pydantic_object.schema().items()} # Remove extraneous fields. reduced_schema = schema diff --git a/libs/core/tests/unit_tests/output_parsers/test_json.py b/libs/core/tests/unit_tests/output_parsers/test_json.py index 6d8cb4bee39..8cedc767699 100644 --- a/libs/core/tests/unit_tests/output_parsers/test_json.py +++ b/libs/core/tests/unit_tests/output_parsers/test_json.py @@ -8,6 +8,8 @@ from langchain_core.output_parsers.json import ( parse_json_markdown, parse_partial_json, ) +from langchain_core.pydantic_v1 import BaseModel +from langchain_core.utils.function_calling import convert_to_openai_function GOOD_JSON = """```json { @@ -579,3 +581,17 @@ def test_partial_text_json_output_parser_with_json_code_block() -> None: {"country_name": "France", "population_size": 673915}, {"country_name": "France", "population_size": 67391582}, ] + + +def test_base_model_schema_consistency() -> None: + class Joke(BaseModel): + setup: str + punchline: str + + initial_joke_schema = {k: v for k, v in Joke.schema().items()} + SimpleJsonOutputParser(pydantic_object=Joke) + openai_func = convert_to_openai_function(Joke) + retrieved_joke_schema = {k: v for k, v in Joke.schema().items()} + + assert initial_joke_schema == retrieved_joke_schema + assert openai_func.get("name", None) is not None diff --git a/libs/langchain/langchain/output_parsers/pydantic.py b/libs/langchain/langchain/output_parsers/pydantic.py index 1248560c0ca..9e415650425 100644 --- a/libs/langchain/langchain/output_parsers/pydantic.py +++ b/libs/langchain/langchain/output_parsers/pydantic.py @@ -29,7 +29,8 @@ class PydanticOutputParser(JsonOutputParser): raise OutputParserException(msg, llm_output=json_object) def get_format_instructions(self) -> str: - schema = self.pydantic_object.schema() + # Copy schema to avoid altering original Pydantic schema. + schema = {k: v for k, v in self.pydantic_object.schema().items()} # Remove extraneous fields. reduced_schema = schema diff --git a/libs/langchain/langchain/output_parsers/yaml.py b/libs/langchain/langchain/output_parsers/yaml.py index 528fc93f507..21bcf359a2c 100644 --- a/libs/langchain/langchain/output_parsers/yaml.py +++ b/libs/langchain/langchain/output_parsers/yaml.py @@ -43,7 +43,8 @@ class YamlOutputParser(BaseOutputParser[T]): raise OutputParserException(msg, llm_output=text) from e def get_format_instructions(self) -> str: - schema = self.pydantic_object.schema() + # Copy schema to avoid altering original Pydantic schema. + schema = {k: v for k, v in self.pydantic_object.schema().items()} # Remove extraneous fields. reduced_schema = schema