mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-02 01:23:07 +00:00
langchain[minor], core[minor]: update json, pydantic parser. add openai-json structured output runnable (#16914)
This commit is contained in:
parent
e22c4d4eb0
commit
852973d616
@ -35,7 +35,7 @@ def _custom_parser(multiline_string: str) -> str:
|
||||
multiline_string = multiline_string.decode()
|
||||
|
||||
multiline_string = re.sub(
|
||||
r'("action_input"\:\s*")(.*)(")',
|
||||
r'("action_input"\:\s*")(.*?)(")',
|
||||
_replace_new_line,
|
||||
multiline_string,
|
||||
flags=re.DOTALL,
|
||||
@ -138,7 +138,7 @@ def parse_json_markdown(
|
||||
The parsed JSON object as a Python dictionary.
|
||||
"""
|
||||
# Try to find JSON string within triple backticks
|
||||
match = re.search(r"```(json)?(.*)(```)?", json_string, re.DOTALL)
|
||||
match = re.search(r"```(json)?(.*)", json_string, re.DOTALL)
|
||||
|
||||
# If no match found, assume the entire string is a JSON string
|
||||
if match is None:
|
||||
@ -148,7 +148,7 @@ def parse_json_markdown(
|
||||
json_str = match.group(2)
|
||||
|
||||
# Strip whitespace and newlines from the start and end
|
||||
json_str = json_str.strip()
|
||||
json_str = json_str.strip().strip("`")
|
||||
|
||||
# handle newlines and other special characters inside the returned value
|
||||
json_str = _custom_parser(json_str)
|
||||
@ -211,7 +211,8 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
|
||||
try:
|
||||
return parse_json_markdown(text)
|
||||
except JSONDecodeError as e:
|
||||
raise OutputParserException(f"Invalid json output: {text}") from e
|
||||
msg = f"Invalid json output: {text}"
|
||||
raise OutputParserException(msg, llm_output=text) from e
|
||||
|
||||
def parse(self, text: str) -> Any:
|
||||
return self.parse_result([Generation(text=text)])
|
||||
|
@ -70,21 +70,7 @@ JSON_WITH_MARKDOWN_CODE_BLOCK = """```json
|
||||
JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "```bar\n<div id="1" class=\"value\">\n\ttext\n</div>```"
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "{"foo": "bar", "bar": "foo"}"
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "{\"foo\": \"bar\", \"bar\": \"foo\"}"
|
||||
"action_input": "```bar\n<div id=\\"1\\" class=\\"value\\">\n\ttext\n</div>```"
|
||||
}
|
||||
```"""
|
||||
|
||||
@ -202,6 +188,8 @@ def test_parse_json_with_code_blocks() -> None:
|
||||
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK)
|
||||
assert parsed == {"foo": "```bar```"}
|
||||
|
||||
|
||||
def test_parse_json_with_code_blocks_and_newlines() -> None:
|
||||
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES)
|
||||
|
||||
assert parsed == {
|
||||
@ -211,8 +199,6 @@ def test_parse_json_with_code_blocks() -> None:
|
||||
|
||||
|
||||
TEST_CASES_ESCAPED_QUOTES = [
|
||||
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON,
|
||||
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON,
|
||||
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON,
|
||||
]
|
||||
|
||||
|
@ -1,10 +1,7 @@
|
||||
from langchain.chains.openai_functions.base import (
|
||||
convert_to_openai_function,
|
||||
create_openai_fn_chain,
|
||||
create_openai_fn_runnable,
|
||||
create_structured_output_chain,
|
||||
create_structured_output_runnable,
|
||||
get_openai_output_parser,
|
||||
)
|
||||
from langchain.chains.openai_functions.citation_fuzzy_match import (
|
||||
create_citation_fuzzy_match_chain,
|
||||
@ -21,6 +18,11 @@ from langchain.chains.openai_functions.tagging import (
|
||||
create_tagging_chain,
|
||||
create_tagging_chain_pydantic,
|
||||
)
|
||||
from langchain.chains.structured_output.base import (
|
||||
create_openai_fn_runnable,
|
||||
create_structured_output_runnable,
|
||||
get_openai_output_parser,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"convert_to_openai_function",
|
||||
@ -33,7 +35,7 @@ __all__ = [
|
||||
"create_qa_with_sources_chain",
|
||||
"create_structured_output_chain",
|
||||
"create_openai_fn_chain",
|
||||
"create_structured_output_runnable",
|
||||
"create_openai_fn_runnable",
|
||||
"get_openai_output_parser",
|
||||
"create_structured_output_runnable", # backwards compatibility
|
||||
"create_openai_fn_runnable", # backwards compatibility
|
||||
"get_openai_output_parser", # backwards compatibility
|
||||
]
|
||||
|
@ -12,229 +12,34 @@ from typing import (
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.output_parsers import (
|
||||
BaseGenerationOutputParser,
|
||||
BaseLLMOutputParser,
|
||||
BaseOutputParser,
|
||||
)
|
||||
from langchain_core.prompts import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from langchain_core.runnables import Runnable
|
||||
from langchain_core.utils.function_calling import (
|
||||
PYTHON_TO_JSON_TYPES,
|
||||
convert_to_openai_function,
|
||||
)
|
||||
|
||||
from langchain.chains import LLMChain
|
||||
from langchain.chains.structured_output.base import (
|
||||
create_openai_fn_runnable,
|
||||
create_structured_output_runnable,
|
||||
get_openai_output_parser,
|
||||
)
|
||||
from langchain.output_parsers.openai_functions import (
|
||||
JsonOutputFunctionsParser,
|
||||
PydanticAttrOutputFunctionsParser,
|
||||
PydanticOutputFunctionsParser,
|
||||
)
|
||||
|
||||
|
||||
def get_openai_output_parser(
|
||||
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
|
||||
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
|
||||
"""Get the appropriate function output parser given the user functions.
|
||||
|
||||
Args:
|
||||
functions: Sequence where element is a dictionary, a pydantic.BaseModel class,
|
||||
or a Python function. If a dictionary is passed in, it is assumed to
|
||||
already be a valid OpenAI function.
|
||||
|
||||
Returns:
|
||||
A PydanticOutputFunctionsParser if functions are Pydantic classes, otherwise
|
||||
a JsonOutputFunctionsParser. If there's only one function and it is
|
||||
not a Pydantic class, then the output parser will automatically extract
|
||||
only the function arguments and not the function name.
|
||||
"""
|
||||
function_names = [convert_to_openai_function(f)["name"] for f in functions]
|
||||
if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
|
||||
if len(functions) > 1:
|
||||
pydantic_schema: Union[Dict, Type[BaseModel]] = {
|
||||
name: fn for name, fn in zip(function_names, functions)
|
||||
}
|
||||
else:
|
||||
pydantic_schema = functions[0]
|
||||
output_parser: Union[
|
||||
BaseOutputParser, BaseGenerationOutputParser
|
||||
] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
|
||||
else:
|
||||
output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
|
||||
return output_parser
|
||||
|
||||
|
||||
def create_openai_fn_runnable(
|
||||
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
|
||||
llm: Runnable,
|
||||
prompt: BasePromptTemplate,
|
||||
*,
|
||||
enforce_single_function_usage: bool = True,
|
||||
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Runnable:
|
||||
"""Create a runnable sequence that uses OpenAI functions.
|
||||
|
||||
Args:
|
||||
functions: A sequence of either dictionaries, pydantic.BaseModels classes, or
|
||||
Python functions. If dictionaries are passed in, they are assumed to
|
||||
already be a valid OpenAI functions. If only a single
|
||||
function is passed in, then it will be enforced that the model use that
|
||||
function. pydantic.BaseModels and Python functions should have docstrings
|
||||
describing what the function does. For best results, pydantic.BaseModels
|
||||
should have descriptions of the parameters and Python functions should have
|
||||
Google Python style args descriptions in the docstring. Additionally,
|
||||
Python functions should only use primitive types (str, int, float, bool) or
|
||||
pydantic.BaseModels for arguments.
|
||||
llm: Language model to use, assumed to support the OpenAI function-calling API.
|
||||
prompt: BasePromptTemplate to pass to the model.
|
||||
enforce_single_function_usage: only used if a single function is passed in. If
|
||||
True, then the model will be forced to use the given function. If False,
|
||||
then the model will be given the option to use the given function or not.
|
||||
output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
|
||||
will be inferred from the function types. If pydantic.BaseModels are passed
|
||||
in, then the OutputParser will try to parse outputs using those. Otherwise
|
||||
model outputs will simply be parsed as JSON. If multiple functions are
|
||||
passed in and they are not pydantic.BaseModels, the chain output will
|
||||
include both the name of the function that was returned and the arguments
|
||||
to pass to the function.
|
||||
|
||||
Returns:
|
||||
A runnable sequence that will pass in the given functions to the model when run.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langchain.chains.openai_functions import create_openai_fn_runnable
|
||||
from langchain_community.chat_models import ChatOpenAI
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
|
||||
|
||||
class RecordPerson(BaseModel):
|
||||
\"\"\"Record some identifying information about a person.\"\"\"
|
||||
|
||||
name: str = Field(..., description="The person's name")
|
||||
age: int = Field(..., description="The person's age")
|
||||
fav_food: Optional[str] = Field(None, description="The person's favorite food")
|
||||
|
||||
|
||||
class RecordDog(BaseModel):
|
||||
\"\"\"Record some identifying information about a dog.\"\"\"
|
||||
|
||||
name: str = Field(..., description="The dog's name")
|
||||
color: str = Field(..., description="The dog's color")
|
||||
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
||||
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4", temperature=0)
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system", "You are a world class algorithm for recording entities."),
|
||||
("human", "Make calls to the relevant function to record the entities in the following input: {input}"),
|
||||
("human", "Tip: Make sure to answer in the correct format"),
|
||||
]
|
||||
)
|
||||
chain = create_openai_fn_runnable([RecordPerson, RecordDog], llm, prompt)
|
||||
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
|
||||
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")
|
||||
""" # noqa: E501
|
||||
if not functions:
|
||||
raise ValueError("Need to pass in at least one function. Received zero.")
|
||||
openai_functions = [convert_to_openai_function(f) for f in functions]
|
||||
llm_kwargs: Dict[str, Any] = {"functions": openai_functions, **kwargs}
|
||||
if len(openai_functions) == 1 and enforce_single_function_usage:
|
||||
llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]}
|
||||
output_parser = output_parser or get_openai_output_parser(functions)
|
||||
return prompt | llm.bind(**llm_kwargs) | output_parser
|
||||
|
||||
|
||||
def create_structured_output_runnable(
|
||||
output_schema: Union[Dict[str, Any], Type[BaseModel]],
|
||||
llm: Runnable,
|
||||
prompt: BasePromptTemplate,
|
||||
*,
|
||||
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Runnable:
|
||||
"""Create a runnable that uses an OpenAI function to get a structured output.
|
||||
|
||||
Args:
|
||||
output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
|
||||
is passed in, it's assumed to already be a valid JsonSchema.
|
||||
For best results, pydantic.BaseModels should have docstrings describing what
|
||||
the schema represents and descriptions for the parameters.
|
||||
llm: Language model to use, assumed to support the OpenAI function-calling API.
|
||||
prompt: BasePromptTemplate to pass to the model.
|
||||
output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
|
||||
will be inferred from the function types. If pydantic.BaseModels are passed
|
||||
in, then the OutputParser will try to parse outputs using those. Otherwise
|
||||
model outputs will simply be parsed as JSON.
|
||||
|
||||
Returns:
|
||||
A runnable sequence that will pass the given function to the model when run.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langchain.chains.openai_functions import create_structured_output_runnable
|
||||
from langchain_community.chat_models import ChatOpenAI
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
|
||||
class Dog(BaseModel):
|
||||
\"\"\"Identifying information about a dog.\"\"\"
|
||||
|
||||
name: str = Field(..., description="The dog's name")
|
||||
color: str = Field(..., description="The dog's color")
|
||||
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
||||
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-0613", temperature=0)
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system", "You are a world class algorithm for extracting information in structured formats."),
|
||||
("human", "Use the given format to extract information from the following input: {input}"),
|
||||
("human", "Tip: Make sure to answer in the correct format"),
|
||||
]
|
||||
)
|
||||
chain = create_structured_output_runnable(Dog, llm, prompt)
|
||||
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
|
||||
# -> Dog(name="Harry", color="brown", fav_food="chicken")
|
||||
""" # noqa: E501
|
||||
if isinstance(output_schema, dict):
|
||||
function: Any = {
|
||||
"name": "output_formatter",
|
||||
"description": (
|
||||
"Output formatter. Should always be used to format your response to the"
|
||||
" user."
|
||||
),
|
||||
"parameters": output_schema,
|
||||
}
|
||||
else:
|
||||
|
||||
class _OutputFormatter(BaseModel):
|
||||
"""Output formatter. Should always be used to format your response to the user.""" # noqa: E501
|
||||
|
||||
output: output_schema # type: ignore
|
||||
|
||||
function = _OutputFormatter
|
||||
output_parser = output_parser or PydanticAttrOutputFunctionsParser(
|
||||
pydantic_schema=_OutputFormatter, attr_name="output"
|
||||
)
|
||||
return create_openai_fn_runnable(
|
||||
[function],
|
||||
llm,
|
||||
prompt,
|
||||
output_parser=output_parser,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
""" --- Legacy --- """
|
||||
__all__ = [
|
||||
"get_openai_output_parser",
|
||||
"create_openai_fn_runnable",
|
||||
"create_structured_output_runnable",
|
||||
"create_openai_fn_chain", # deprecated
|
||||
"create_structured_output_chain", # deprecated
|
||||
"PYTHON_TO_JSON_TYPES", # backwards compatibility
|
||||
"convert_to_openai_function", # backwards compatibility
|
||||
]
|
||||
|
||||
|
||||
@deprecated(since="0.1.1", removal="0.2.0", alternative="create_openai_fn_runnable")
|
||||
@ -426,14 +231,3 @@ def create_structured_output_chain(
|
||||
output_parser=output_parser,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"create_openai_fn_chain",
|
||||
"create_openai_fn_runnable",
|
||||
"create_structured_output_chain",
|
||||
"create_structured_output_runnable",
|
||||
"get_openai_output_parser",
|
||||
"PYTHON_TO_JSON_TYPES",
|
||||
"convert_to_openai_function",
|
||||
]
|
||||
|
@ -0,0 +1,6 @@
|
||||
from langchain.chains.structured_output.base import (
|
||||
create_openai_fn_runnable,
|
||||
create_structured_output_runnable,
|
||||
)
|
||||
|
||||
__all__ = ["create_structured_output_runnable", "create_openai_fn_runnable"]
|
321
libs/langchain/langchain/chains/structured_output/base.py
Normal file
321
libs/langchain/langchain/chains/structured_output/base.py
Normal file
@ -0,0 +1,321 @@
|
||||
import json
|
||||
from typing import Any, Callable, Dict, Literal, Optional, Sequence, Type, Union
|
||||
|
||||
from langchain_core.output_parsers import (
|
||||
BaseGenerationOutputParser,
|
||||
BaseOutputParser,
|
||||
JsonOutputParser,
|
||||
)
|
||||
from langchain_core.prompts import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from langchain_core.runnables import Runnable
|
||||
from langchain_core.utils.function_calling import convert_to_openai_function
|
||||
|
||||
from langchain.output_parsers import PydanticOutputParser
|
||||
from langchain.output_parsers.openai_functions import (
|
||||
JsonOutputFunctionsParser,
|
||||
PydanticAttrOutputFunctionsParser,
|
||||
PydanticOutputFunctionsParser,
|
||||
)
|
||||
|
||||
|
||||
def create_openai_fn_runnable(
|
||||
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
|
||||
llm: Runnable,
|
||||
prompt: BasePromptTemplate,
|
||||
*,
|
||||
enforce_single_function_usage: bool = True,
|
||||
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Runnable:
|
||||
"""Create a runnable sequence that uses OpenAI functions.
|
||||
|
||||
Args:
|
||||
functions: A sequence of either dictionaries, pydantic.BaseModels classes, or
|
||||
Python functions. If dictionaries are passed in, they are assumed to
|
||||
already be a valid OpenAI functions. If only a single
|
||||
function is passed in, then it will be enforced that the model use that
|
||||
function. pydantic.BaseModels and Python functions should have docstrings
|
||||
describing what the function does. For best results, pydantic.BaseModels
|
||||
should have descriptions of the parameters and Python functions should have
|
||||
Google Python style args descriptions in the docstring. Additionally,
|
||||
Python functions should only use primitive types (str, int, float, bool) or
|
||||
pydantic.BaseModels for arguments.
|
||||
llm: Language model to use, assumed to support the OpenAI function-calling API.
|
||||
prompt: BasePromptTemplate to pass to the model.
|
||||
enforce_single_function_usage: only used if a single function is passed in. If
|
||||
True, then the model will be forced to use the given function. If False,
|
||||
then the model will be given the option to use the given function or not.
|
||||
output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
|
||||
will be inferred from the function types. If pydantic.BaseModels are passed
|
||||
in, then the OutputParser will try to parse outputs using those. Otherwise
|
||||
model outputs will simply be parsed as JSON. If multiple functions are
|
||||
passed in and they are not pydantic.BaseModels, the chain output will
|
||||
include both the name of the function that was returned and the arguments
|
||||
to pass to the function.
|
||||
|
||||
Returns:
|
||||
A runnable sequence that will pass in the given functions to the model when run.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langchain.chains.structured_output import create_openai_fn_runnable
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
|
||||
|
||||
class RecordPerson(BaseModel):
|
||||
'''Record some identifying information about a person.'''
|
||||
|
||||
name: str = Field(..., description="The person's name")
|
||||
age: int = Field(..., description="The person's age")
|
||||
fav_food: Optional[str] = Field(None, description="The person's favorite food")
|
||||
|
||||
|
||||
class RecordDog(BaseModel):
|
||||
'''Record some identifying information about a dog.'''
|
||||
|
||||
name: str = Field(..., description="The dog's name")
|
||||
color: str = Field(..., description="The dog's color")
|
||||
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
||||
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4", temperature=0)
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system", "You are a world class algorithm for recording entities."),
|
||||
("human", "Make calls to the relevant function to record the entities in the following input: {input}"),
|
||||
("human", "Tip: Make sure to answer in the correct format"),
|
||||
]
|
||||
)
|
||||
chain = create_openai_fn_runnable([RecordPerson, RecordDog], llm, prompt)
|
||||
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
|
||||
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")
|
||||
""" # noqa: E501
|
||||
if not functions:
|
||||
raise ValueError("Need to pass in at least one function. Received zero.")
|
||||
openai_functions = [convert_to_openai_function(f) for f in functions]
|
||||
llm_kwargs: Dict[str, Any] = {"functions": openai_functions, **kwargs}
|
||||
if len(openai_functions) == 1 and enforce_single_function_usage:
|
||||
llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]}
|
||||
output_parser = output_parser or get_openai_output_parser(functions)
|
||||
return prompt | llm.bind(**llm_kwargs) | output_parser
|
||||
|
||||
|
||||
# TODO: implement mode='openai-tools'.
|
||||
def create_structured_output_runnable(
|
||||
output_schema: Union[Dict[str, Any], Type[BaseModel]],
|
||||
llm: Runnable,
|
||||
prompt: BasePromptTemplate,
|
||||
*,
|
||||
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
||||
mode: Literal["openai-functions", "openai-json"] = "openai-functions",
|
||||
enforce_single_function_usage: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> Runnable:
|
||||
"""Create a runnable for extracting structured outputs.
|
||||
|
||||
Args:
|
||||
output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
|
||||
is passed in, it's assumed to already be a valid JsonSchema.
|
||||
For best results, pydantic.BaseModels should have docstrings describing what
|
||||
the schema represents and descriptions for the parameters.
|
||||
llm: Language model to use. Assumed to support the OpenAI function-calling API
|
||||
if mode is 'openai-function'. Assumed to support OpenAI response_format
|
||||
parameter if mode is 'openai-json'.
|
||||
prompt: BasePromptTemplate to pass to the model. If mode is 'openai-json' and
|
||||
prompt has input variable 'output_schema' then the given output_schema
|
||||
will be converted to a JsonSchema and inserted in the prompt.
|
||||
output_parser: Output parser to use for parsing model outputs. By default
|
||||
will be inferred from the function types. If pydantic.BaseModel is passed
|
||||
in, then the OutputParser will try to parse outputs using the pydantic
|
||||
class. Otherwise model outputs will be parsed as JSON.
|
||||
mode: How structured outputs are extracted from the model. If 'openai-functions'
|
||||
then OpenAI function calling is used. If 'openai-json' then OpenAI model
|
||||
with response_format set to JSON is used.
|
||||
enforce_single_function_usage: Only used if mode is 'openai-functions'. Only
|
||||
used if a single function is passed in. If
|
||||
True, then the model will be forced to use the given function. If False,
|
||||
then the model will be given the option to use the given function or not.
|
||||
**kwargs: Additional named arguments.
|
||||
|
||||
Returns:
|
||||
A runnable sequence that will return a structured output matching the given
|
||||
output_schema.
|
||||
|
||||
OpenAI functions example:
|
||||
.. code-block:: python
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langchain.chains.structured_output import create_structured_output_runnable
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
|
||||
class Dog(BaseModel):
|
||||
'''Identifying information about a dog.'''
|
||||
|
||||
name: str = Field(..., description="The dog's name")
|
||||
color: str = Field(..., description="The dog's color")
|
||||
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
||||
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system", "You are a world class algorithm for extracting information in structured formats."),
|
||||
("human", "Use the given format to extract information from the following input: {input}"),
|
||||
("human", "Tip: Make sure to answer in the correct format"),
|
||||
]
|
||||
)
|
||||
chain = create_structured_output_runnable(Dog, llm, prompt, mode="openai-functions")
|
||||
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
|
||||
# -> Dog(name="Harry", color="brown", fav_food="chicken")
|
||||
|
||||
OpenAI json response format example:
|
||||
.. code-block:: python
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langchain.chains.structured_output import create_structured_output_runnable
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
|
||||
class Dog(BaseModel):
|
||||
'''Identifying information about a dog.'''
|
||||
|
||||
name: str = Field(..., description="The dog's name")
|
||||
color: str = Field(..., description="The dog's color")
|
||||
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
||||
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
|
||||
system = '''You are a world class assistant for extracting information in structured JSON formats. \
|
||||
|
||||
Extract a valid JSON blob from the user input that matches the following JSON Schema:
|
||||
|
||||
{output_schema}'''
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system", system),
|
||||
("human", "{input}"),
|
||||
]
|
||||
)
|
||||
chain = create_structured_output_runnable(Dog, llm, prompt, mode="openai-json")
|
||||
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
|
||||
""" # noqa: E501
|
||||
if mode == "openai-functions":
|
||||
return _create_openai_functions_structured_output_runnable(
|
||||
output_schema,
|
||||
llm,
|
||||
prompt,
|
||||
output_parser=output_parser,
|
||||
enforce_single_function_usage=enforce_single_function_usage,
|
||||
**kwargs,
|
||||
)
|
||||
elif mode == "openai-json":
|
||||
return _create_openai_json_runnable(
|
||||
output_schema, llm, prompt, output_parser=output_parser, **kwargs
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid mode {mode}. Expected one of 'openai-functions', "
|
||||
f"'openai-json'."
|
||||
)
|
||||
|
||||
|
||||
def get_openai_output_parser(
|
||||
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
|
||||
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
|
||||
"""Get the appropriate function output parser given the user functions.
|
||||
|
||||
Args:
|
||||
functions: Sequence where element is a dictionary, a pydantic.BaseModel class,
|
||||
or a Python function. If a dictionary is passed in, it is assumed to
|
||||
already be a valid OpenAI function.
|
||||
|
||||
Returns:
|
||||
A PydanticOutputFunctionsParser if functions are Pydantic classes, otherwise
|
||||
a JsonOutputFunctionsParser. If there's only one function and it is
|
||||
not a Pydantic class, then the output parser will automatically extract
|
||||
only the function arguments and not the function name.
|
||||
"""
|
||||
function_names = [convert_to_openai_function(f)["name"] for f in functions]
|
||||
if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
|
||||
if len(functions) > 1:
|
||||
pydantic_schema: Union[Dict, Type[BaseModel]] = {
|
||||
name: fn for name, fn in zip(function_names, functions)
|
||||
}
|
||||
else:
|
||||
pydantic_schema = functions[0]
|
||||
output_parser: Union[
|
||||
BaseOutputParser, BaseGenerationOutputParser
|
||||
] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
|
||||
else:
|
||||
output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
|
||||
return output_parser
|
||||
|
||||
|
||||
def _create_openai_json_runnable(
|
||||
output_schema: Union[Dict[str, Any], Type[BaseModel]],
|
||||
llm: Runnable,
|
||||
prompt: BasePromptTemplate,
|
||||
*,
|
||||
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
||||
) -> Runnable:
|
||||
""""""
|
||||
if isinstance(output_schema, type) and issubclass(output_schema, BaseModel):
|
||||
output_parser = output_parser or PydanticOutputParser(
|
||||
pydantic_object=output_schema,
|
||||
)
|
||||
schema_as_dict = convert_to_openai_function(output_schema)["parameters"]
|
||||
else:
|
||||
output_parser = output_parser or JsonOutputParser()
|
||||
schema_as_dict = output_schema
|
||||
|
||||
if "output_schema" in prompt.input_variables:
|
||||
prompt = prompt.partial(output_schema=json.dumps(schema_as_dict, indent=2))
|
||||
|
||||
llm = llm.bind(response_format={"type": "json_object"})
|
||||
return prompt | llm | output_parser
|
||||
|
||||
|
||||
def _create_openai_functions_structured_output_runnable(
|
||||
output_schema: Union[Dict[str, Any], Type[BaseModel]],
|
||||
llm: Runnable,
|
||||
prompt: BasePromptTemplate,
|
||||
*,
|
||||
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Runnable:
|
||||
if isinstance(output_schema, dict):
|
||||
function: Any = {
|
||||
"name": "output_formatter",
|
||||
"description": (
|
||||
"Output formatter. Should always be used to format your response to the"
|
||||
" user."
|
||||
),
|
||||
"parameters": output_schema,
|
||||
}
|
||||
else:
|
||||
|
||||
class _OutputFormatter(BaseModel):
|
||||
"""Output formatter. Should always be used to format your response to the user.""" # noqa: E501
|
||||
|
||||
output: output_schema # type: ignore
|
||||
|
||||
function = _OutputFormatter
|
||||
output_parser = output_parser or PydanticAttrOutputFunctionsParser(
|
||||
pydantic_schema=_OutputFormatter, attr_name="output"
|
||||
)
|
||||
return create_openai_fn_runnable(
|
||||
[function],
|
||||
llm,
|
||||
prompt,
|
||||
output_parser=output_parser,
|
||||
**kwargs,
|
||||
)
|
@ -1,42 +1,32 @@
|
||||
import json
|
||||
import re
|
||||
from typing import Type, TypeVar
|
||||
from typing import Any, List, Type
|
||||
|
||||
from langchain_core.exceptions import OutputParserException
|
||||
from langchain_core.output_parsers import BaseOutputParser
|
||||
from langchain_core.output_parsers import JsonOutputParser
|
||||
from langchain_core.outputs import Generation
|
||||
from langchain_core.pydantic_v1 import BaseModel, ValidationError
|
||||
|
||||
from langchain.output_parsers.format_instructions import PYDANTIC_FORMAT_INSTRUCTIONS
|
||||
|
||||
T = TypeVar("T", bound=BaseModel)
|
||||
|
||||
|
||||
class PydanticOutputParser(BaseOutputParser[T]):
|
||||
class PydanticOutputParser(JsonOutputParser):
|
||||
"""Parse an output using a pydantic model."""
|
||||
|
||||
pydantic_object: Type[T]
|
||||
pydantic_object: Type[BaseModel]
|
||||
"""The pydantic model to parse.
|
||||
|
||||
Attention: To avoid potential compatibility issues, it's recommended to use
|
||||
pydantic <2 or leverage the v1 namespace in pydantic >= 2.
|
||||
"""
|
||||
|
||||
def parse(self, text: str) -> T:
|
||||
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
|
||||
json_object = super().parse_result(result)
|
||||
try:
|
||||
# Greedy search for 1st json candidate.
|
||||
match = re.search(
|
||||
r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL
|
||||
)
|
||||
json_str = ""
|
||||
if match:
|
||||
json_str = match.group()
|
||||
json_object = json.loads(json_str, strict=False)
|
||||
return self.pydantic_object.parse_obj(json_object)
|
||||
|
||||
except (json.JSONDecodeError, ValidationError) as e:
|
||||
except ValidationError as e:
|
||||
name = self.pydantic_object.__name__
|
||||
msg = f"Failed to parse {name} from completion {text}. Got: {e}"
|
||||
raise OutputParserException(msg, llm_output=text)
|
||||
msg = f"Failed to parse {name} from completion {json_object}. Got: {e}"
|
||||
raise OutputParserException(msg, llm_output=json_object)
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
schema = self.pydantic_object.schema()
|
||||
@ -57,6 +47,6 @@ class PydanticOutputParser(BaseOutputParser[T]):
|
||||
return "pydantic"
|
||||
|
||||
@property
|
||||
def OutputType(self) -> Type[T]:
|
||||
def OutputType(self) -> Type[BaseModel]:
|
||||
"""Return the pydantic model."""
|
||||
return self.pydantic_object
|
||||
|
@ -53,7 +53,7 @@ DEF_EXPECTED_RESULT = TestModel(
|
||||
def test_pydantic_output_parser() -> None:
|
||||
"""Test PydanticOutputParser."""
|
||||
|
||||
pydantic_parser: PydanticOutputParser[TestModel] = PydanticOutputParser(
|
||||
pydantic_parser: PydanticOutputParser = PydanticOutputParser(
|
||||
pydantic_object=TestModel
|
||||
)
|
||||
|
||||
@ -65,7 +65,7 @@ def test_pydantic_output_parser() -> None:
|
||||
def test_pydantic_output_parser_fail() -> None:
|
||||
"""Test PydanticOutputParser where completion result fails schema validation."""
|
||||
|
||||
pydantic_parser: PydanticOutputParser[TestModel] = PydanticOutputParser(
|
||||
pydantic_parser: PydanticOutputParser = PydanticOutputParser(
|
||||
pydantic_object=TestModel
|
||||
)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user