diff --git a/libs/core/langchain_core/output_parsers/json.py b/libs/core/langchain_core/output_parsers/json.py index 6500f778d64..d16316596a7 100644 --- a/libs/core/langchain_core/output_parsers/json.py +++ b/libs/core/langchain_core/output_parsers/json.py @@ -35,7 +35,7 @@ def _custom_parser(multiline_string: str) -> str: multiline_string = multiline_string.decode() multiline_string = re.sub( - r'("action_input"\:\s*")(.*)(")', + r'("action_input"\:\s*")(.*?)(")', _replace_new_line, multiline_string, flags=re.DOTALL, @@ -138,7 +138,7 @@ def parse_json_markdown( The parsed JSON object as a Python dictionary. """ # Try to find JSON string within triple backticks - match = re.search(r"```(json)?(.*)(```)?", json_string, re.DOTALL) + match = re.search(r"```(json)?(.*)", json_string, re.DOTALL) # If no match found, assume the entire string is a JSON string if match is None: @@ -148,7 +148,7 @@ def parse_json_markdown( json_str = match.group(2) # Strip whitespace and newlines from the start and end - json_str = json_str.strip() + json_str = json_str.strip().strip("`") # handle newlines and other special characters inside the returned value json_str = _custom_parser(json_str) @@ -211,7 +211,8 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]): try: return parse_json_markdown(text) except JSONDecodeError as e: - raise OutputParserException(f"Invalid json output: {text}") from e + msg = f"Invalid json output: {text}" + raise OutputParserException(msg, llm_output=text) from e def parse(self, text: str) -> Any: return self.parse_result([Generation(text=text)]) diff --git a/libs/core/tests/unit_tests/output_parsers/test_json.py b/libs/core/tests/unit_tests/output_parsers/test_json.py index 5559768bb07..716ccab48dd 100644 --- a/libs/core/tests/unit_tests/output_parsers/test_json.py +++ b/libs/core/tests/unit_tests/output_parsers/test_json.py @@ -70,21 +70,7 @@ JSON_WITH_MARKDOWN_CODE_BLOCK = """```json JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json { "action": "Final Answer", - "action_input": "```bar\n
\n\ttext\n
```" -} -```""" - -JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON = """```json -{ - "action": "Final Answer", - "action_input": "{"foo": "bar", "bar": "foo"}" -} -```""" - -JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON = """```json -{ - "action": "Final Answer", - "action_input": "{\"foo\": \"bar\", \"bar\": \"foo\"}" + "action_input": "```bar\n
\n\ttext\n
```" } ```""" @@ -202,6 +188,8 @@ def test_parse_json_with_code_blocks() -> None: parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK) assert parsed == {"foo": "```bar```"} + +def test_parse_json_with_code_blocks_and_newlines() -> None: parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES) assert parsed == { @@ -211,8 +199,6 @@ def test_parse_json_with_code_blocks() -> None: TEST_CASES_ESCAPED_QUOTES = [ - JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON, - JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON, JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON, ] diff --git a/libs/langchain/langchain/chains/openai_functions/__init__.py b/libs/langchain/langchain/chains/openai_functions/__init__.py index 8be27606d7a..b6268422451 100644 --- a/libs/langchain/langchain/chains/openai_functions/__init__.py +++ b/libs/langchain/langchain/chains/openai_functions/__init__.py @@ -1,10 +1,7 @@ from langchain.chains.openai_functions.base import ( convert_to_openai_function, create_openai_fn_chain, - create_openai_fn_runnable, create_structured_output_chain, - create_structured_output_runnable, - get_openai_output_parser, ) from langchain.chains.openai_functions.citation_fuzzy_match import ( create_citation_fuzzy_match_chain, @@ -21,6 +18,11 @@ from langchain.chains.openai_functions.tagging import ( create_tagging_chain, create_tagging_chain_pydantic, ) +from langchain.chains.structured_output.base import ( + create_openai_fn_runnable, + create_structured_output_runnable, + get_openai_output_parser, +) __all__ = [ "convert_to_openai_function", @@ -33,7 +35,7 @@ __all__ = [ "create_qa_with_sources_chain", "create_structured_output_chain", "create_openai_fn_chain", - "create_structured_output_runnable", - "create_openai_fn_runnable", - "get_openai_output_parser", + "create_structured_output_runnable", # backwards compatibility + "create_openai_fn_runnable", # backwards compatibility + "get_openai_output_parser", # backwards compatibility ] diff --git a/libs/langchain/langchain/chains/openai_functions/base.py b/libs/langchain/langchain/chains/openai_functions/base.py index 7f8fdd4431b..2cef8ac74ce 100644 --- a/libs/langchain/langchain/chains/openai_functions/base.py +++ b/libs/langchain/langchain/chains/openai_functions/base.py @@ -12,229 +12,34 @@ from typing import ( from langchain_core._api import deprecated from langchain_core.language_models import BaseLanguageModel from langchain_core.output_parsers import ( - BaseGenerationOutputParser, BaseLLMOutputParser, - BaseOutputParser, ) from langchain_core.prompts import BasePromptTemplate from langchain_core.pydantic_v1 import BaseModel -from langchain_core.runnables import Runnable from langchain_core.utils.function_calling import ( PYTHON_TO_JSON_TYPES, convert_to_openai_function, ) from langchain.chains import LLMChain +from langchain.chains.structured_output.base import ( + create_openai_fn_runnable, + create_structured_output_runnable, + get_openai_output_parser, +) from langchain.output_parsers.openai_functions import ( - JsonOutputFunctionsParser, PydanticAttrOutputFunctionsParser, - PydanticOutputFunctionsParser, ) - -def get_openai_output_parser( - functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]], -) -> Union[BaseOutputParser, BaseGenerationOutputParser]: - """Get the appropriate function output parser given the user functions. - - Args: - functions: Sequence where element is a dictionary, a pydantic.BaseModel class, - or a Python function. If a dictionary is passed in, it is assumed to - already be a valid OpenAI function. - - Returns: - A PydanticOutputFunctionsParser if functions are Pydantic classes, otherwise - a JsonOutputFunctionsParser. If there's only one function and it is - not a Pydantic class, then the output parser will automatically extract - only the function arguments and not the function name. - """ - function_names = [convert_to_openai_function(f)["name"] for f in functions] - if isinstance(functions[0], type) and issubclass(functions[0], BaseModel): - if len(functions) > 1: - pydantic_schema: Union[Dict, Type[BaseModel]] = { - name: fn for name, fn in zip(function_names, functions) - } - else: - pydantic_schema = functions[0] - output_parser: Union[ - BaseOutputParser, BaseGenerationOutputParser - ] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema) - else: - output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1) - return output_parser - - -def create_openai_fn_runnable( - functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]], - llm: Runnable, - prompt: BasePromptTemplate, - *, - enforce_single_function_usage: bool = True, - output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None, - **kwargs: Any, -) -> Runnable: - """Create a runnable sequence that uses OpenAI functions. - - Args: - functions: A sequence of either dictionaries, pydantic.BaseModels classes, or - Python functions. If dictionaries are passed in, they are assumed to - already be a valid OpenAI functions. If only a single - function is passed in, then it will be enforced that the model use that - function. pydantic.BaseModels and Python functions should have docstrings - describing what the function does. For best results, pydantic.BaseModels - should have descriptions of the parameters and Python functions should have - Google Python style args descriptions in the docstring. Additionally, - Python functions should only use primitive types (str, int, float, bool) or - pydantic.BaseModels for arguments. - llm: Language model to use, assumed to support the OpenAI function-calling API. - prompt: BasePromptTemplate to pass to the model. - enforce_single_function_usage: only used if a single function is passed in. If - True, then the model will be forced to use the given function. If False, - then the model will be given the option to use the given function or not. - output_parser: BaseLLMOutputParser to use for parsing model outputs. By default - will be inferred from the function types. If pydantic.BaseModels are passed - in, then the OutputParser will try to parse outputs using those. Otherwise - model outputs will simply be parsed as JSON. If multiple functions are - passed in and they are not pydantic.BaseModels, the chain output will - include both the name of the function that was returned and the arguments - to pass to the function. - - Returns: - A runnable sequence that will pass in the given functions to the model when run. - - Example: - .. code-block:: python - - from typing import Optional - - from langchain.chains.openai_functions import create_openai_fn_runnable - from langchain_community.chat_models import ChatOpenAI - from langchain_core.prompts import ChatPromptTemplate - from langchain_core.pydantic_v1 import BaseModel, Field - - - class RecordPerson(BaseModel): - \"\"\"Record some identifying information about a person.\"\"\" - - name: str = Field(..., description="The person's name") - age: int = Field(..., description="The person's age") - fav_food: Optional[str] = Field(None, description="The person's favorite food") - - - class RecordDog(BaseModel): - \"\"\"Record some identifying information about a dog.\"\"\" - - name: str = Field(..., description="The dog's name") - color: str = Field(..., description="The dog's color") - fav_food: Optional[str] = Field(None, description="The dog's favorite food") - - - llm = ChatOpenAI(model="gpt-4", temperature=0) - prompt = ChatPromptTemplate.from_messages( - [ - ("system", "You are a world class algorithm for recording entities."), - ("human", "Make calls to the relevant function to record the entities in the following input: {input}"), - ("human", "Tip: Make sure to answer in the correct format"), - ] - ) - chain = create_openai_fn_runnable([RecordPerson, RecordDog], llm, prompt) - chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"}) - # -> RecordDog(name="Harry", color="brown", fav_food="chicken") - """ # noqa: E501 - if not functions: - raise ValueError("Need to pass in at least one function. Received zero.") - openai_functions = [convert_to_openai_function(f) for f in functions] - llm_kwargs: Dict[str, Any] = {"functions": openai_functions, **kwargs} - if len(openai_functions) == 1 and enforce_single_function_usage: - llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]} - output_parser = output_parser or get_openai_output_parser(functions) - return prompt | llm.bind(**llm_kwargs) | output_parser - - -def create_structured_output_runnable( - output_schema: Union[Dict[str, Any], Type[BaseModel]], - llm: Runnable, - prompt: BasePromptTemplate, - *, - output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None, - **kwargs: Any, -) -> Runnable: - """Create a runnable that uses an OpenAI function to get a structured output. - - Args: - output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary - is passed in, it's assumed to already be a valid JsonSchema. - For best results, pydantic.BaseModels should have docstrings describing what - the schema represents and descriptions for the parameters. - llm: Language model to use, assumed to support the OpenAI function-calling API. - prompt: BasePromptTemplate to pass to the model. - output_parser: BaseLLMOutputParser to use for parsing model outputs. By default - will be inferred from the function types. If pydantic.BaseModels are passed - in, then the OutputParser will try to parse outputs using those. Otherwise - model outputs will simply be parsed as JSON. - - Returns: - A runnable sequence that will pass the given function to the model when run. - - Example: - .. code-block:: python - - from typing import Optional - - from langchain.chains.openai_functions import create_structured_output_runnable - from langchain_community.chat_models import ChatOpenAI - from langchain_core.prompts import ChatPromptTemplate - from langchain_core.pydantic_v1 import BaseModel, Field - - class Dog(BaseModel): - \"\"\"Identifying information about a dog.\"\"\" - - name: str = Field(..., description="The dog's name") - color: str = Field(..., description="The dog's color") - fav_food: Optional[str] = Field(None, description="The dog's favorite food") - - llm = ChatOpenAI(model="gpt-3.5-turbo-0613", temperature=0) - prompt = ChatPromptTemplate.from_messages( - [ - ("system", "You are a world class algorithm for extracting information in structured formats."), - ("human", "Use the given format to extract information from the following input: {input}"), - ("human", "Tip: Make sure to answer in the correct format"), - ] - ) - chain = create_structured_output_runnable(Dog, llm, prompt) - chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"}) - # -> Dog(name="Harry", color="brown", fav_food="chicken") - """ # noqa: E501 - if isinstance(output_schema, dict): - function: Any = { - "name": "output_formatter", - "description": ( - "Output formatter. Should always be used to format your response to the" - " user." - ), - "parameters": output_schema, - } - else: - - class _OutputFormatter(BaseModel): - """Output formatter. Should always be used to format your response to the user.""" # noqa: E501 - - output: output_schema # type: ignore - - function = _OutputFormatter - output_parser = output_parser or PydanticAttrOutputFunctionsParser( - pydantic_schema=_OutputFormatter, attr_name="output" - ) - return create_openai_fn_runnable( - [function], - llm, - prompt, - output_parser=output_parser, - **kwargs, - ) - - -""" --- Legacy --- """ +__all__ = [ + "get_openai_output_parser", + "create_openai_fn_runnable", + "create_structured_output_runnable", + "create_openai_fn_chain", # deprecated + "create_structured_output_chain", # deprecated + "PYTHON_TO_JSON_TYPES", # backwards compatibility + "convert_to_openai_function", # backwards compatibility +] @deprecated(since="0.1.1", removal="0.2.0", alternative="create_openai_fn_runnable") @@ -426,14 +231,3 @@ def create_structured_output_chain( output_parser=output_parser, **kwargs, ) - - -__all__ = [ - "create_openai_fn_chain", - "create_openai_fn_runnable", - "create_structured_output_chain", - "create_structured_output_runnable", - "get_openai_output_parser", - "PYTHON_TO_JSON_TYPES", - "convert_to_openai_function", -] diff --git a/libs/langchain/langchain/chains/structured_output/__init__.py b/libs/langchain/langchain/chains/structured_output/__init__.py new file mode 100644 index 00000000000..d387a7cbe4e --- /dev/null +++ b/libs/langchain/langchain/chains/structured_output/__init__.py @@ -0,0 +1,6 @@ +from langchain.chains.structured_output.base import ( + create_openai_fn_runnable, + create_structured_output_runnable, +) + +__all__ = ["create_structured_output_runnable", "create_openai_fn_runnable"] diff --git a/libs/langchain/langchain/chains/structured_output/base.py b/libs/langchain/langchain/chains/structured_output/base.py new file mode 100644 index 00000000000..d825c1fe7c8 --- /dev/null +++ b/libs/langchain/langchain/chains/structured_output/base.py @@ -0,0 +1,321 @@ +import json +from typing import Any, Callable, Dict, Literal, Optional, Sequence, Type, Union + +from langchain_core.output_parsers import ( + BaseGenerationOutputParser, + BaseOutputParser, + JsonOutputParser, +) +from langchain_core.prompts import BasePromptTemplate +from langchain_core.pydantic_v1 import BaseModel +from langchain_core.runnables import Runnable +from langchain_core.utils.function_calling import convert_to_openai_function + +from langchain.output_parsers import PydanticOutputParser +from langchain.output_parsers.openai_functions import ( + JsonOutputFunctionsParser, + PydanticAttrOutputFunctionsParser, + PydanticOutputFunctionsParser, +) + + +def create_openai_fn_runnable( + functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]], + llm: Runnable, + prompt: BasePromptTemplate, + *, + enforce_single_function_usage: bool = True, + output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None, + **kwargs: Any, +) -> Runnable: + """Create a runnable sequence that uses OpenAI functions. + + Args: + functions: A sequence of either dictionaries, pydantic.BaseModels classes, or + Python functions. If dictionaries are passed in, they are assumed to + already be a valid OpenAI functions. If only a single + function is passed in, then it will be enforced that the model use that + function. pydantic.BaseModels and Python functions should have docstrings + describing what the function does. For best results, pydantic.BaseModels + should have descriptions of the parameters and Python functions should have + Google Python style args descriptions in the docstring. Additionally, + Python functions should only use primitive types (str, int, float, bool) or + pydantic.BaseModels for arguments. + llm: Language model to use, assumed to support the OpenAI function-calling API. + prompt: BasePromptTemplate to pass to the model. + enforce_single_function_usage: only used if a single function is passed in. If + True, then the model will be forced to use the given function. If False, + then the model will be given the option to use the given function or not. + output_parser: BaseLLMOutputParser to use for parsing model outputs. By default + will be inferred from the function types. If pydantic.BaseModels are passed + in, then the OutputParser will try to parse outputs using those. Otherwise + model outputs will simply be parsed as JSON. If multiple functions are + passed in and they are not pydantic.BaseModels, the chain output will + include both the name of the function that was returned and the arguments + to pass to the function. + + Returns: + A runnable sequence that will pass in the given functions to the model when run. + + Example: + .. code-block:: python + + from typing import Optional + + from langchain.chains.structured_output import create_openai_fn_runnable + from langchain_openai import ChatOpenAI + from langchain_core.prompts import ChatPromptTemplate + from langchain_core.pydantic_v1 import BaseModel, Field + + + class RecordPerson(BaseModel): + '''Record some identifying information about a person.''' + + name: str = Field(..., description="The person's name") + age: int = Field(..., description="The person's age") + fav_food: Optional[str] = Field(None, description="The person's favorite food") + + + class RecordDog(BaseModel): + '''Record some identifying information about a dog.''' + + name: str = Field(..., description="The dog's name") + color: str = Field(..., description="The dog's color") + fav_food: Optional[str] = Field(None, description="The dog's favorite food") + + + llm = ChatOpenAI(model="gpt-4", temperature=0) + prompt = ChatPromptTemplate.from_messages( + [ + ("system", "You are a world class algorithm for recording entities."), + ("human", "Make calls to the relevant function to record the entities in the following input: {input}"), + ("human", "Tip: Make sure to answer in the correct format"), + ] + ) + chain = create_openai_fn_runnable([RecordPerson, RecordDog], llm, prompt) + chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"}) + # -> RecordDog(name="Harry", color="brown", fav_food="chicken") + """ # noqa: E501 + if not functions: + raise ValueError("Need to pass in at least one function. Received zero.") + openai_functions = [convert_to_openai_function(f) for f in functions] + llm_kwargs: Dict[str, Any] = {"functions": openai_functions, **kwargs} + if len(openai_functions) == 1 and enforce_single_function_usage: + llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]} + output_parser = output_parser or get_openai_output_parser(functions) + return prompt | llm.bind(**llm_kwargs) | output_parser + + +# TODO: implement mode='openai-tools'. +def create_structured_output_runnable( + output_schema: Union[Dict[str, Any], Type[BaseModel]], + llm: Runnable, + prompt: BasePromptTemplate, + *, + output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None, + mode: Literal["openai-functions", "openai-json"] = "openai-functions", + enforce_single_function_usage: bool = True, + **kwargs: Any, +) -> Runnable: + """Create a runnable for extracting structured outputs. + + Args: + output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary + is passed in, it's assumed to already be a valid JsonSchema. + For best results, pydantic.BaseModels should have docstrings describing what + the schema represents and descriptions for the parameters. + llm: Language model to use. Assumed to support the OpenAI function-calling API + if mode is 'openai-function'. Assumed to support OpenAI response_format + parameter if mode is 'openai-json'. + prompt: BasePromptTemplate to pass to the model. If mode is 'openai-json' and + prompt has input variable 'output_schema' then the given output_schema + will be converted to a JsonSchema and inserted in the prompt. + output_parser: Output parser to use for parsing model outputs. By default + will be inferred from the function types. If pydantic.BaseModel is passed + in, then the OutputParser will try to parse outputs using the pydantic + class. Otherwise model outputs will be parsed as JSON. + mode: How structured outputs are extracted from the model. If 'openai-functions' + then OpenAI function calling is used. If 'openai-json' then OpenAI model + with response_format set to JSON is used. + enforce_single_function_usage: Only used if mode is 'openai-functions'. Only + used if a single function is passed in. If + True, then the model will be forced to use the given function. If False, + then the model will be given the option to use the given function or not. + **kwargs: Additional named arguments. + + Returns: + A runnable sequence that will return a structured output matching the given + output_schema. + + OpenAI functions example: + .. code-block:: python + + from typing import Optional + + from langchain.chains.structured_output import create_structured_output_runnable + from langchain_openai import ChatOpenAI + from langchain_core.prompts import ChatPromptTemplate + from langchain_core.pydantic_v1 import BaseModel, Field + + class Dog(BaseModel): + '''Identifying information about a dog.''' + + name: str = Field(..., description="The dog's name") + color: str = Field(..., description="The dog's color") + fav_food: Optional[str] = Field(None, description="The dog's favorite food") + + llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) + prompt = ChatPromptTemplate.from_messages( + [ + ("system", "You are a world class algorithm for extracting information in structured formats."), + ("human", "Use the given format to extract information from the following input: {input}"), + ("human", "Tip: Make sure to answer in the correct format"), + ] + ) + chain = create_structured_output_runnable(Dog, llm, prompt, mode="openai-functions") + chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"}) + # -> Dog(name="Harry", color="brown", fav_food="chicken") + + OpenAI json response format example: + .. code-block:: python + + from typing import Optional + + from langchain.chains.structured_output import create_structured_output_runnable + from langchain_openai import ChatOpenAI + from langchain_core.prompts import ChatPromptTemplate + from langchain_core.pydantic_v1 import BaseModel, Field + + class Dog(BaseModel): + '''Identifying information about a dog.''' + + name: str = Field(..., description="The dog's name") + color: str = Field(..., description="The dog's color") + fav_food: Optional[str] = Field(None, description="The dog's favorite food") + + llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) + system = '''You are a world class assistant for extracting information in structured JSON formats. \ + + Extract a valid JSON blob from the user input that matches the following JSON Schema: + + {output_schema}''' + prompt = ChatPromptTemplate.from_messages( + [ + ("system", system), + ("human", "{input}"), + ] + ) + chain = create_structured_output_runnable(Dog, llm, prompt, mode="openai-json") + chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"}) + """ # noqa: E501 + if mode == "openai-functions": + return _create_openai_functions_structured_output_runnable( + output_schema, + llm, + prompt, + output_parser=output_parser, + enforce_single_function_usage=enforce_single_function_usage, + **kwargs, + ) + elif mode == "openai-json": + return _create_openai_json_runnable( + output_schema, llm, prompt, output_parser=output_parser, **kwargs + ) + else: + raise ValueError( + f"Invalid mode {mode}. Expected one of 'openai-functions', " + f"'openai-json'." + ) + + +def get_openai_output_parser( + functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]], +) -> Union[BaseOutputParser, BaseGenerationOutputParser]: + """Get the appropriate function output parser given the user functions. + + Args: + functions: Sequence where element is a dictionary, a pydantic.BaseModel class, + or a Python function. If a dictionary is passed in, it is assumed to + already be a valid OpenAI function. + + Returns: + A PydanticOutputFunctionsParser if functions are Pydantic classes, otherwise + a JsonOutputFunctionsParser. If there's only one function and it is + not a Pydantic class, then the output parser will automatically extract + only the function arguments and not the function name. + """ + function_names = [convert_to_openai_function(f)["name"] for f in functions] + if isinstance(functions[0], type) and issubclass(functions[0], BaseModel): + if len(functions) > 1: + pydantic_schema: Union[Dict, Type[BaseModel]] = { + name: fn for name, fn in zip(function_names, functions) + } + else: + pydantic_schema = functions[0] + output_parser: Union[ + BaseOutputParser, BaseGenerationOutputParser + ] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema) + else: + output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1) + return output_parser + + +def _create_openai_json_runnable( + output_schema: Union[Dict[str, Any], Type[BaseModel]], + llm: Runnable, + prompt: BasePromptTemplate, + *, + output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None, +) -> Runnable: + """""" + if isinstance(output_schema, type) and issubclass(output_schema, BaseModel): + output_parser = output_parser or PydanticOutputParser( + pydantic_object=output_schema, + ) + schema_as_dict = convert_to_openai_function(output_schema)["parameters"] + else: + output_parser = output_parser or JsonOutputParser() + schema_as_dict = output_schema + + if "output_schema" in prompt.input_variables: + prompt = prompt.partial(output_schema=json.dumps(schema_as_dict, indent=2)) + + llm = llm.bind(response_format={"type": "json_object"}) + return prompt | llm | output_parser + + +def _create_openai_functions_structured_output_runnable( + output_schema: Union[Dict[str, Any], Type[BaseModel]], + llm: Runnable, + prompt: BasePromptTemplate, + *, + output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None, + **kwargs: Any, +) -> Runnable: + if isinstance(output_schema, dict): + function: Any = { + "name": "output_formatter", + "description": ( + "Output formatter. Should always be used to format your response to the" + " user." + ), + "parameters": output_schema, + } + else: + + class _OutputFormatter(BaseModel): + """Output formatter. Should always be used to format your response to the user.""" # noqa: E501 + + output: output_schema # type: ignore + + function = _OutputFormatter + output_parser = output_parser or PydanticAttrOutputFunctionsParser( + pydantic_schema=_OutputFormatter, attr_name="output" + ) + return create_openai_fn_runnable( + [function], + llm, + prompt, + output_parser=output_parser, + **kwargs, + ) diff --git a/libs/langchain/langchain/output_parsers/pydantic.py b/libs/langchain/langchain/output_parsers/pydantic.py index 00424018531..1248560c0ca 100644 --- a/libs/langchain/langchain/output_parsers/pydantic.py +++ b/libs/langchain/langchain/output_parsers/pydantic.py @@ -1,42 +1,32 @@ import json -import re -from typing import Type, TypeVar +from typing import Any, List, Type from langchain_core.exceptions import OutputParserException -from langchain_core.output_parsers import BaseOutputParser +from langchain_core.output_parsers import JsonOutputParser +from langchain_core.outputs import Generation from langchain_core.pydantic_v1 import BaseModel, ValidationError from langchain.output_parsers.format_instructions import PYDANTIC_FORMAT_INSTRUCTIONS -T = TypeVar("T", bound=BaseModel) - -class PydanticOutputParser(BaseOutputParser[T]): +class PydanticOutputParser(JsonOutputParser): """Parse an output using a pydantic model.""" - pydantic_object: Type[T] + pydantic_object: Type[BaseModel] """The pydantic model to parse. Attention: To avoid potential compatibility issues, it's recommended to use pydantic <2 or leverage the v1 namespace in pydantic >= 2. """ - def parse(self, text: str) -> T: + def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any: + json_object = super().parse_result(result) try: - # Greedy search for 1st json candidate. - match = re.search( - r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL - ) - json_str = "" - if match: - json_str = match.group() - json_object = json.loads(json_str, strict=False) return self.pydantic_object.parse_obj(json_object) - - except (json.JSONDecodeError, ValidationError) as e: + except ValidationError as e: name = self.pydantic_object.__name__ - msg = f"Failed to parse {name} from completion {text}. Got: {e}" - raise OutputParserException(msg, llm_output=text) + msg = f"Failed to parse {name} from completion {json_object}. Got: {e}" + raise OutputParserException(msg, llm_output=json_object) def get_format_instructions(self) -> str: schema = self.pydantic_object.schema() @@ -57,6 +47,6 @@ class PydanticOutputParser(BaseOutputParser[T]): return "pydantic" @property - def OutputType(self) -> Type[T]: + def OutputType(self) -> Type[BaseModel]: """Return the pydantic model.""" return self.pydantic_object diff --git a/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py b/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py index 9230792f764..07505e7bef5 100644 --- a/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py +++ b/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py @@ -53,7 +53,7 @@ DEF_EXPECTED_RESULT = TestModel( def test_pydantic_output_parser() -> None: """Test PydanticOutputParser.""" - pydantic_parser: PydanticOutputParser[TestModel] = PydanticOutputParser( + pydantic_parser: PydanticOutputParser = PydanticOutputParser( pydantic_object=TestModel ) @@ -65,7 +65,7 @@ def test_pydantic_output_parser() -> None: def test_pydantic_output_parser_fail() -> None: """Test PydanticOutputParser where completion result fails schema validation.""" - pydantic_parser: PydanticOutputParser[TestModel] = PydanticOutputParser( + pydantic_parser: PydanticOutputParser = PydanticOutputParser( pydantic_object=TestModel )