diff --git a/libs/core/langchain_core/output_parsers/json.py b/libs/core/langchain_core/output_parsers/json.py
index 6500f778d64..d16316596a7 100644
--- a/libs/core/langchain_core/output_parsers/json.py
+++ b/libs/core/langchain_core/output_parsers/json.py
@@ -35,7 +35,7 @@ def _custom_parser(multiline_string: str) -> str:
multiline_string = multiline_string.decode()
multiline_string = re.sub(
- r'("action_input"\:\s*")(.*)(")',
+ r'("action_input"\:\s*")(.*?)(")',
_replace_new_line,
multiline_string,
flags=re.DOTALL,
@@ -138,7 +138,7 @@ def parse_json_markdown(
The parsed JSON object as a Python dictionary.
"""
# Try to find JSON string within triple backticks
- match = re.search(r"```(json)?(.*)(```)?", json_string, re.DOTALL)
+ match = re.search(r"```(json)?(.*)", json_string, re.DOTALL)
# If no match found, assume the entire string is a JSON string
if match is None:
@@ -148,7 +148,7 @@ def parse_json_markdown(
json_str = match.group(2)
# Strip whitespace and newlines from the start and end
- json_str = json_str.strip()
+ json_str = json_str.strip().strip("`")
# handle newlines and other special characters inside the returned value
json_str = _custom_parser(json_str)
@@ -211,7 +211,8 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
try:
return parse_json_markdown(text)
except JSONDecodeError as e:
- raise OutputParserException(f"Invalid json output: {text}") from e
+ msg = f"Invalid json output: {text}"
+ raise OutputParserException(msg, llm_output=text) from e
def parse(self, text: str) -> Any:
return self.parse_result([Generation(text=text)])
diff --git a/libs/core/tests/unit_tests/output_parsers/test_json.py b/libs/core/tests/unit_tests/output_parsers/test_json.py
index 5559768bb07..716ccab48dd 100644
--- a/libs/core/tests/unit_tests/output_parsers/test_json.py
+++ b/libs/core/tests/unit_tests/output_parsers/test_json.py
@@ -70,21 +70,7 @@ JSON_WITH_MARKDOWN_CODE_BLOCK = """```json
JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json
{
"action": "Final Answer",
- "action_input": "```bar\n
\n\ttext\n
```"
-}
-```"""
-
-JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON = """```json
-{
- "action": "Final Answer",
- "action_input": "{"foo": "bar", "bar": "foo"}"
-}
-```"""
-
-JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON = """```json
-{
- "action": "Final Answer",
- "action_input": "{\"foo\": \"bar\", \"bar\": \"foo\"}"
+ "action_input": "```bar\n\n\ttext\n
```"
}
```"""
@@ -202,6 +188,8 @@ def test_parse_json_with_code_blocks() -> None:
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK)
assert parsed == {"foo": "```bar```"}
+
+def test_parse_json_with_code_blocks_and_newlines() -> None:
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES)
assert parsed == {
@@ -211,8 +199,6 @@ def test_parse_json_with_code_blocks() -> None:
TEST_CASES_ESCAPED_QUOTES = [
- JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON,
- JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON,
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON,
]
diff --git a/libs/langchain/langchain/chains/openai_functions/__init__.py b/libs/langchain/langchain/chains/openai_functions/__init__.py
index 8be27606d7a..b6268422451 100644
--- a/libs/langchain/langchain/chains/openai_functions/__init__.py
+++ b/libs/langchain/langchain/chains/openai_functions/__init__.py
@@ -1,10 +1,7 @@
from langchain.chains.openai_functions.base import (
convert_to_openai_function,
create_openai_fn_chain,
- create_openai_fn_runnable,
create_structured_output_chain,
- create_structured_output_runnable,
- get_openai_output_parser,
)
from langchain.chains.openai_functions.citation_fuzzy_match import (
create_citation_fuzzy_match_chain,
@@ -21,6 +18,11 @@ from langchain.chains.openai_functions.tagging import (
create_tagging_chain,
create_tagging_chain_pydantic,
)
+from langchain.chains.structured_output.base import (
+ create_openai_fn_runnable,
+ create_structured_output_runnable,
+ get_openai_output_parser,
+)
__all__ = [
"convert_to_openai_function",
@@ -33,7 +35,7 @@ __all__ = [
"create_qa_with_sources_chain",
"create_structured_output_chain",
"create_openai_fn_chain",
- "create_structured_output_runnable",
- "create_openai_fn_runnable",
- "get_openai_output_parser",
+ "create_structured_output_runnable", # backwards compatibility
+ "create_openai_fn_runnable", # backwards compatibility
+ "get_openai_output_parser", # backwards compatibility
]
diff --git a/libs/langchain/langchain/chains/openai_functions/base.py b/libs/langchain/langchain/chains/openai_functions/base.py
index 7f8fdd4431b..2cef8ac74ce 100644
--- a/libs/langchain/langchain/chains/openai_functions/base.py
+++ b/libs/langchain/langchain/chains/openai_functions/base.py
@@ -12,229 +12,34 @@ from typing import (
from langchain_core._api import deprecated
from langchain_core.language_models import BaseLanguageModel
from langchain_core.output_parsers import (
- BaseGenerationOutputParser,
BaseLLMOutputParser,
- BaseOutputParser,
)
from langchain_core.prompts import BasePromptTemplate
from langchain_core.pydantic_v1 import BaseModel
-from langchain_core.runnables import Runnable
from langchain_core.utils.function_calling import (
PYTHON_TO_JSON_TYPES,
convert_to_openai_function,
)
from langchain.chains import LLMChain
+from langchain.chains.structured_output.base import (
+ create_openai_fn_runnable,
+ create_structured_output_runnable,
+ get_openai_output_parser,
+)
from langchain.output_parsers.openai_functions import (
- JsonOutputFunctionsParser,
PydanticAttrOutputFunctionsParser,
- PydanticOutputFunctionsParser,
)
-
-def get_openai_output_parser(
- functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
-) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
- """Get the appropriate function output parser given the user functions.
-
- Args:
- functions: Sequence where element is a dictionary, a pydantic.BaseModel class,
- or a Python function. If a dictionary is passed in, it is assumed to
- already be a valid OpenAI function.
-
- Returns:
- A PydanticOutputFunctionsParser if functions are Pydantic classes, otherwise
- a JsonOutputFunctionsParser. If there's only one function and it is
- not a Pydantic class, then the output parser will automatically extract
- only the function arguments and not the function name.
- """
- function_names = [convert_to_openai_function(f)["name"] for f in functions]
- if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
- if len(functions) > 1:
- pydantic_schema: Union[Dict, Type[BaseModel]] = {
- name: fn for name, fn in zip(function_names, functions)
- }
- else:
- pydantic_schema = functions[0]
- output_parser: Union[
- BaseOutputParser, BaseGenerationOutputParser
- ] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
- else:
- output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
- return output_parser
-
-
-def create_openai_fn_runnable(
- functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
- llm: Runnable,
- prompt: BasePromptTemplate,
- *,
- enforce_single_function_usage: bool = True,
- output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
- **kwargs: Any,
-) -> Runnable:
- """Create a runnable sequence that uses OpenAI functions.
-
- Args:
- functions: A sequence of either dictionaries, pydantic.BaseModels classes, or
- Python functions. If dictionaries are passed in, they are assumed to
- already be a valid OpenAI functions. If only a single
- function is passed in, then it will be enforced that the model use that
- function. pydantic.BaseModels and Python functions should have docstrings
- describing what the function does. For best results, pydantic.BaseModels
- should have descriptions of the parameters and Python functions should have
- Google Python style args descriptions in the docstring. Additionally,
- Python functions should only use primitive types (str, int, float, bool) or
- pydantic.BaseModels for arguments.
- llm: Language model to use, assumed to support the OpenAI function-calling API.
- prompt: BasePromptTemplate to pass to the model.
- enforce_single_function_usage: only used if a single function is passed in. If
- True, then the model will be forced to use the given function. If False,
- then the model will be given the option to use the given function or not.
- output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
- will be inferred from the function types. If pydantic.BaseModels are passed
- in, then the OutputParser will try to parse outputs using those. Otherwise
- model outputs will simply be parsed as JSON. If multiple functions are
- passed in and they are not pydantic.BaseModels, the chain output will
- include both the name of the function that was returned and the arguments
- to pass to the function.
-
- Returns:
- A runnable sequence that will pass in the given functions to the model when run.
-
- Example:
- .. code-block:: python
-
- from typing import Optional
-
- from langchain.chains.openai_functions import create_openai_fn_runnable
- from langchain_community.chat_models import ChatOpenAI
- from langchain_core.prompts import ChatPromptTemplate
- from langchain_core.pydantic_v1 import BaseModel, Field
-
-
- class RecordPerson(BaseModel):
- \"\"\"Record some identifying information about a person.\"\"\"
-
- name: str = Field(..., description="The person's name")
- age: int = Field(..., description="The person's age")
- fav_food: Optional[str] = Field(None, description="The person's favorite food")
-
-
- class RecordDog(BaseModel):
- \"\"\"Record some identifying information about a dog.\"\"\"
-
- name: str = Field(..., description="The dog's name")
- color: str = Field(..., description="The dog's color")
- fav_food: Optional[str] = Field(None, description="The dog's favorite food")
-
-
- llm = ChatOpenAI(model="gpt-4", temperature=0)
- prompt = ChatPromptTemplate.from_messages(
- [
- ("system", "You are a world class algorithm for recording entities."),
- ("human", "Make calls to the relevant function to record the entities in the following input: {input}"),
- ("human", "Tip: Make sure to answer in the correct format"),
- ]
- )
- chain = create_openai_fn_runnable([RecordPerson, RecordDog], llm, prompt)
- chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
- # -> RecordDog(name="Harry", color="brown", fav_food="chicken")
- """ # noqa: E501
- if not functions:
- raise ValueError("Need to pass in at least one function. Received zero.")
- openai_functions = [convert_to_openai_function(f) for f in functions]
- llm_kwargs: Dict[str, Any] = {"functions": openai_functions, **kwargs}
- if len(openai_functions) == 1 and enforce_single_function_usage:
- llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]}
- output_parser = output_parser or get_openai_output_parser(functions)
- return prompt | llm.bind(**llm_kwargs) | output_parser
-
-
-def create_structured_output_runnable(
- output_schema: Union[Dict[str, Any], Type[BaseModel]],
- llm: Runnable,
- prompt: BasePromptTemplate,
- *,
- output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
- **kwargs: Any,
-) -> Runnable:
- """Create a runnable that uses an OpenAI function to get a structured output.
-
- Args:
- output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
- is passed in, it's assumed to already be a valid JsonSchema.
- For best results, pydantic.BaseModels should have docstrings describing what
- the schema represents and descriptions for the parameters.
- llm: Language model to use, assumed to support the OpenAI function-calling API.
- prompt: BasePromptTemplate to pass to the model.
- output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
- will be inferred from the function types. If pydantic.BaseModels are passed
- in, then the OutputParser will try to parse outputs using those. Otherwise
- model outputs will simply be parsed as JSON.
-
- Returns:
- A runnable sequence that will pass the given function to the model when run.
-
- Example:
- .. code-block:: python
-
- from typing import Optional
-
- from langchain.chains.openai_functions import create_structured_output_runnable
- from langchain_community.chat_models import ChatOpenAI
- from langchain_core.prompts import ChatPromptTemplate
- from langchain_core.pydantic_v1 import BaseModel, Field
-
- class Dog(BaseModel):
- \"\"\"Identifying information about a dog.\"\"\"
-
- name: str = Field(..., description="The dog's name")
- color: str = Field(..., description="The dog's color")
- fav_food: Optional[str] = Field(None, description="The dog's favorite food")
-
- llm = ChatOpenAI(model="gpt-3.5-turbo-0613", temperature=0)
- prompt = ChatPromptTemplate.from_messages(
- [
- ("system", "You are a world class algorithm for extracting information in structured formats."),
- ("human", "Use the given format to extract information from the following input: {input}"),
- ("human", "Tip: Make sure to answer in the correct format"),
- ]
- )
- chain = create_structured_output_runnable(Dog, llm, prompt)
- chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
- # -> Dog(name="Harry", color="brown", fav_food="chicken")
- """ # noqa: E501
- if isinstance(output_schema, dict):
- function: Any = {
- "name": "output_formatter",
- "description": (
- "Output formatter. Should always be used to format your response to the"
- " user."
- ),
- "parameters": output_schema,
- }
- else:
-
- class _OutputFormatter(BaseModel):
- """Output formatter. Should always be used to format your response to the user.""" # noqa: E501
-
- output: output_schema # type: ignore
-
- function = _OutputFormatter
- output_parser = output_parser or PydanticAttrOutputFunctionsParser(
- pydantic_schema=_OutputFormatter, attr_name="output"
- )
- return create_openai_fn_runnable(
- [function],
- llm,
- prompt,
- output_parser=output_parser,
- **kwargs,
- )
-
-
-""" --- Legacy --- """
+__all__ = [
+ "get_openai_output_parser",
+ "create_openai_fn_runnable",
+ "create_structured_output_runnable",
+ "create_openai_fn_chain", # deprecated
+ "create_structured_output_chain", # deprecated
+ "PYTHON_TO_JSON_TYPES", # backwards compatibility
+ "convert_to_openai_function", # backwards compatibility
+]
@deprecated(since="0.1.1", removal="0.2.0", alternative="create_openai_fn_runnable")
@@ -426,14 +231,3 @@ def create_structured_output_chain(
output_parser=output_parser,
**kwargs,
)
-
-
-__all__ = [
- "create_openai_fn_chain",
- "create_openai_fn_runnable",
- "create_structured_output_chain",
- "create_structured_output_runnable",
- "get_openai_output_parser",
- "PYTHON_TO_JSON_TYPES",
- "convert_to_openai_function",
-]
diff --git a/libs/langchain/langchain/chains/structured_output/__init__.py b/libs/langchain/langchain/chains/structured_output/__init__.py
new file mode 100644
index 00000000000..d387a7cbe4e
--- /dev/null
+++ b/libs/langchain/langchain/chains/structured_output/__init__.py
@@ -0,0 +1,6 @@
+from langchain.chains.structured_output.base import (
+ create_openai_fn_runnable,
+ create_structured_output_runnable,
+)
+
+__all__ = ["create_structured_output_runnable", "create_openai_fn_runnable"]
diff --git a/libs/langchain/langchain/chains/structured_output/base.py b/libs/langchain/langchain/chains/structured_output/base.py
new file mode 100644
index 00000000000..d825c1fe7c8
--- /dev/null
+++ b/libs/langchain/langchain/chains/structured_output/base.py
@@ -0,0 +1,321 @@
+import json
+from typing import Any, Callable, Dict, Literal, Optional, Sequence, Type, Union
+
+from langchain_core.output_parsers import (
+ BaseGenerationOutputParser,
+ BaseOutputParser,
+ JsonOutputParser,
+)
+from langchain_core.prompts import BasePromptTemplate
+from langchain_core.pydantic_v1 import BaseModel
+from langchain_core.runnables import Runnable
+from langchain_core.utils.function_calling import convert_to_openai_function
+
+from langchain.output_parsers import PydanticOutputParser
+from langchain.output_parsers.openai_functions import (
+ JsonOutputFunctionsParser,
+ PydanticAttrOutputFunctionsParser,
+ PydanticOutputFunctionsParser,
+)
+
+
+def create_openai_fn_runnable(
+ functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
+ llm: Runnable,
+ prompt: BasePromptTemplate,
+ *,
+ enforce_single_function_usage: bool = True,
+ output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
+ **kwargs: Any,
+) -> Runnable:
+ """Create a runnable sequence that uses OpenAI functions.
+
+ Args:
+ functions: A sequence of either dictionaries, pydantic.BaseModels classes, or
+ Python functions. If dictionaries are passed in, they are assumed to
+ already be a valid OpenAI functions. If only a single
+ function is passed in, then it will be enforced that the model use that
+ function. pydantic.BaseModels and Python functions should have docstrings
+ describing what the function does. For best results, pydantic.BaseModels
+ should have descriptions of the parameters and Python functions should have
+ Google Python style args descriptions in the docstring. Additionally,
+ Python functions should only use primitive types (str, int, float, bool) or
+ pydantic.BaseModels for arguments.
+ llm: Language model to use, assumed to support the OpenAI function-calling API.
+ prompt: BasePromptTemplate to pass to the model.
+ enforce_single_function_usage: only used if a single function is passed in. If
+ True, then the model will be forced to use the given function. If False,
+ then the model will be given the option to use the given function or not.
+ output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
+ will be inferred from the function types. If pydantic.BaseModels are passed
+ in, then the OutputParser will try to parse outputs using those. Otherwise
+ model outputs will simply be parsed as JSON. If multiple functions are
+ passed in and they are not pydantic.BaseModels, the chain output will
+ include both the name of the function that was returned and the arguments
+ to pass to the function.
+
+ Returns:
+ A runnable sequence that will pass in the given functions to the model when run.
+
+ Example:
+ .. code-block:: python
+
+ from typing import Optional
+
+ from langchain.chains.structured_output import create_openai_fn_runnable
+ from langchain_openai import ChatOpenAI
+ from langchain_core.prompts import ChatPromptTemplate
+ from langchain_core.pydantic_v1 import BaseModel, Field
+
+
+ class RecordPerson(BaseModel):
+ '''Record some identifying information about a person.'''
+
+ name: str = Field(..., description="The person's name")
+ age: int = Field(..., description="The person's age")
+ fav_food: Optional[str] = Field(None, description="The person's favorite food")
+
+
+ class RecordDog(BaseModel):
+ '''Record some identifying information about a dog.'''
+
+ name: str = Field(..., description="The dog's name")
+ color: str = Field(..., description="The dog's color")
+ fav_food: Optional[str] = Field(None, description="The dog's favorite food")
+
+
+ llm = ChatOpenAI(model="gpt-4", temperature=0)
+ prompt = ChatPromptTemplate.from_messages(
+ [
+ ("system", "You are a world class algorithm for recording entities."),
+ ("human", "Make calls to the relevant function to record the entities in the following input: {input}"),
+ ("human", "Tip: Make sure to answer in the correct format"),
+ ]
+ )
+ chain = create_openai_fn_runnable([RecordPerson, RecordDog], llm, prompt)
+ chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
+ # -> RecordDog(name="Harry", color="brown", fav_food="chicken")
+ """ # noqa: E501
+ if not functions:
+ raise ValueError("Need to pass in at least one function. Received zero.")
+ openai_functions = [convert_to_openai_function(f) for f in functions]
+ llm_kwargs: Dict[str, Any] = {"functions": openai_functions, **kwargs}
+ if len(openai_functions) == 1 and enforce_single_function_usage:
+ llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]}
+ output_parser = output_parser or get_openai_output_parser(functions)
+ return prompt | llm.bind(**llm_kwargs) | output_parser
+
+
+# TODO: implement mode='openai-tools'.
+def create_structured_output_runnable(
+ output_schema: Union[Dict[str, Any], Type[BaseModel]],
+ llm: Runnable,
+ prompt: BasePromptTemplate,
+ *,
+ output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
+ mode: Literal["openai-functions", "openai-json"] = "openai-functions",
+ enforce_single_function_usage: bool = True,
+ **kwargs: Any,
+) -> Runnable:
+ """Create a runnable for extracting structured outputs.
+
+ Args:
+ output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
+ is passed in, it's assumed to already be a valid JsonSchema.
+ For best results, pydantic.BaseModels should have docstrings describing what
+ the schema represents and descriptions for the parameters.
+ llm: Language model to use. Assumed to support the OpenAI function-calling API
+ if mode is 'openai-function'. Assumed to support OpenAI response_format
+ parameter if mode is 'openai-json'.
+ prompt: BasePromptTemplate to pass to the model. If mode is 'openai-json' and
+ prompt has input variable 'output_schema' then the given output_schema
+ will be converted to a JsonSchema and inserted in the prompt.
+ output_parser: Output parser to use for parsing model outputs. By default
+ will be inferred from the function types. If pydantic.BaseModel is passed
+ in, then the OutputParser will try to parse outputs using the pydantic
+ class. Otherwise model outputs will be parsed as JSON.
+ mode: How structured outputs are extracted from the model. If 'openai-functions'
+ then OpenAI function calling is used. If 'openai-json' then OpenAI model
+ with response_format set to JSON is used.
+ enforce_single_function_usage: Only used if mode is 'openai-functions'. Only
+ used if a single function is passed in. If
+ True, then the model will be forced to use the given function. If False,
+ then the model will be given the option to use the given function or not.
+ **kwargs: Additional named arguments.
+
+ Returns:
+ A runnable sequence that will return a structured output matching the given
+ output_schema.
+
+ OpenAI functions example:
+ .. code-block:: python
+
+ from typing import Optional
+
+ from langchain.chains.structured_output import create_structured_output_runnable
+ from langchain_openai import ChatOpenAI
+ from langchain_core.prompts import ChatPromptTemplate
+ from langchain_core.pydantic_v1 import BaseModel, Field
+
+ class Dog(BaseModel):
+ '''Identifying information about a dog.'''
+
+ name: str = Field(..., description="The dog's name")
+ color: str = Field(..., description="The dog's color")
+ fav_food: Optional[str] = Field(None, description="The dog's favorite food")
+
+ llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
+ prompt = ChatPromptTemplate.from_messages(
+ [
+ ("system", "You are a world class algorithm for extracting information in structured formats."),
+ ("human", "Use the given format to extract information from the following input: {input}"),
+ ("human", "Tip: Make sure to answer in the correct format"),
+ ]
+ )
+ chain = create_structured_output_runnable(Dog, llm, prompt, mode="openai-functions")
+ chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
+ # -> Dog(name="Harry", color="brown", fav_food="chicken")
+
+ OpenAI json response format example:
+ .. code-block:: python
+
+ from typing import Optional
+
+ from langchain.chains.structured_output import create_structured_output_runnable
+ from langchain_openai import ChatOpenAI
+ from langchain_core.prompts import ChatPromptTemplate
+ from langchain_core.pydantic_v1 import BaseModel, Field
+
+ class Dog(BaseModel):
+ '''Identifying information about a dog.'''
+
+ name: str = Field(..., description="The dog's name")
+ color: str = Field(..., description="The dog's color")
+ fav_food: Optional[str] = Field(None, description="The dog's favorite food")
+
+ llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
+ system = '''You are a world class assistant for extracting information in structured JSON formats. \
+
+ Extract a valid JSON blob from the user input that matches the following JSON Schema:
+
+ {output_schema}'''
+ prompt = ChatPromptTemplate.from_messages(
+ [
+ ("system", system),
+ ("human", "{input}"),
+ ]
+ )
+ chain = create_structured_output_runnable(Dog, llm, prompt, mode="openai-json")
+ chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
+ """ # noqa: E501
+ if mode == "openai-functions":
+ return _create_openai_functions_structured_output_runnable(
+ output_schema,
+ llm,
+ prompt,
+ output_parser=output_parser,
+ enforce_single_function_usage=enforce_single_function_usage,
+ **kwargs,
+ )
+ elif mode == "openai-json":
+ return _create_openai_json_runnable(
+ output_schema, llm, prompt, output_parser=output_parser, **kwargs
+ )
+ else:
+ raise ValueError(
+ f"Invalid mode {mode}. Expected one of 'openai-functions', "
+ f"'openai-json'."
+ )
+
+
+def get_openai_output_parser(
+ functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
+) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
+ """Get the appropriate function output parser given the user functions.
+
+ Args:
+ functions: Sequence where element is a dictionary, a pydantic.BaseModel class,
+ or a Python function. If a dictionary is passed in, it is assumed to
+ already be a valid OpenAI function.
+
+ Returns:
+ A PydanticOutputFunctionsParser if functions are Pydantic classes, otherwise
+ a JsonOutputFunctionsParser. If there's only one function and it is
+ not a Pydantic class, then the output parser will automatically extract
+ only the function arguments and not the function name.
+ """
+ function_names = [convert_to_openai_function(f)["name"] for f in functions]
+ if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
+ if len(functions) > 1:
+ pydantic_schema: Union[Dict, Type[BaseModel]] = {
+ name: fn for name, fn in zip(function_names, functions)
+ }
+ else:
+ pydantic_schema = functions[0]
+ output_parser: Union[
+ BaseOutputParser, BaseGenerationOutputParser
+ ] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
+ else:
+ output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
+ return output_parser
+
+
+def _create_openai_json_runnable(
+ output_schema: Union[Dict[str, Any], Type[BaseModel]],
+ llm: Runnable,
+ prompt: BasePromptTemplate,
+ *,
+ output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
+) -> Runnable:
+ """"""
+ if isinstance(output_schema, type) and issubclass(output_schema, BaseModel):
+ output_parser = output_parser or PydanticOutputParser(
+ pydantic_object=output_schema,
+ )
+ schema_as_dict = convert_to_openai_function(output_schema)["parameters"]
+ else:
+ output_parser = output_parser or JsonOutputParser()
+ schema_as_dict = output_schema
+
+ if "output_schema" in prompt.input_variables:
+ prompt = prompt.partial(output_schema=json.dumps(schema_as_dict, indent=2))
+
+ llm = llm.bind(response_format={"type": "json_object"})
+ return prompt | llm | output_parser
+
+
+def _create_openai_functions_structured_output_runnable(
+ output_schema: Union[Dict[str, Any], Type[BaseModel]],
+ llm: Runnable,
+ prompt: BasePromptTemplate,
+ *,
+ output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
+ **kwargs: Any,
+) -> Runnable:
+ if isinstance(output_schema, dict):
+ function: Any = {
+ "name": "output_formatter",
+ "description": (
+ "Output formatter. Should always be used to format your response to the"
+ " user."
+ ),
+ "parameters": output_schema,
+ }
+ else:
+
+ class _OutputFormatter(BaseModel):
+ """Output formatter. Should always be used to format your response to the user.""" # noqa: E501
+
+ output: output_schema # type: ignore
+
+ function = _OutputFormatter
+ output_parser = output_parser or PydanticAttrOutputFunctionsParser(
+ pydantic_schema=_OutputFormatter, attr_name="output"
+ )
+ return create_openai_fn_runnable(
+ [function],
+ llm,
+ prompt,
+ output_parser=output_parser,
+ **kwargs,
+ )
diff --git a/libs/langchain/langchain/output_parsers/pydantic.py b/libs/langchain/langchain/output_parsers/pydantic.py
index 00424018531..1248560c0ca 100644
--- a/libs/langchain/langchain/output_parsers/pydantic.py
+++ b/libs/langchain/langchain/output_parsers/pydantic.py
@@ -1,42 +1,32 @@
import json
-import re
-from typing import Type, TypeVar
+from typing import Any, List, Type
from langchain_core.exceptions import OutputParserException
-from langchain_core.output_parsers import BaseOutputParser
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_core.outputs import Generation
from langchain_core.pydantic_v1 import BaseModel, ValidationError
from langchain.output_parsers.format_instructions import PYDANTIC_FORMAT_INSTRUCTIONS
-T = TypeVar("T", bound=BaseModel)
-
-class PydanticOutputParser(BaseOutputParser[T]):
+class PydanticOutputParser(JsonOutputParser):
"""Parse an output using a pydantic model."""
- pydantic_object: Type[T]
+ pydantic_object: Type[BaseModel]
"""The pydantic model to parse.
Attention: To avoid potential compatibility issues, it's recommended to use
pydantic <2 or leverage the v1 namespace in pydantic >= 2.
"""
- def parse(self, text: str) -> T:
+ def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
+ json_object = super().parse_result(result)
try:
- # Greedy search for 1st json candidate.
- match = re.search(
- r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL
- )
- json_str = ""
- if match:
- json_str = match.group()
- json_object = json.loads(json_str, strict=False)
return self.pydantic_object.parse_obj(json_object)
-
- except (json.JSONDecodeError, ValidationError) as e:
+ except ValidationError as e:
name = self.pydantic_object.__name__
- msg = f"Failed to parse {name} from completion {text}. Got: {e}"
- raise OutputParserException(msg, llm_output=text)
+ msg = f"Failed to parse {name} from completion {json_object}. Got: {e}"
+ raise OutputParserException(msg, llm_output=json_object)
def get_format_instructions(self) -> str:
schema = self.pydantic_object.schema()
@@ -57,6 +47,6 @@ class PydanticOutputParser(BaseOutputParser[T]):
return "pydantic"
@property
- def OutputType(self) -> Type[T]:
+ def OutputType(self) -> Type[BaseModel]:
"""Return the pydantic model."""
return self.pydantic_object
diff --git a/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py b/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py
index 9230792f764..07505e7bef5 100644
--- a/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py
+++ b/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py
@@ -53,7 +53,7 @@ DEF_EXPECTED_RESULT = TestModel(
def test_pydantic_output_parser() -> None:
"""Test PydanticOutputParser."""
- pydantic_parser: PydanticOutputParser[TestModel] = PydanticOutputParser(
+ pydantic_parser: PydanticOutputParser = PydanticOutputParser(
pydantic_object=TestModel
)
@@ -65,7 +65,7 @@ def test_pydantic_output_parser() -> None:
def test_pydantic_output_parser_fail() -> None:
"""Test PydanticOutputParser where completion result fails schema validation."""
- pydantic_parser: PydanticOutputParser[TestModel] = PydanticOutputParser(
+ pydantic_parser: PydanticOutputParser = PydanticOutputParser(
pydantic_object=TestModel
)