mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-07 12:06:43 +00:00
langchain[minor], core[minor]: update json, pydantic parser. add openai-json structured output runnable (#16914)
This commit is contained in:
parent
e22c4d4eb0
commit
852973d616
@ -35,7 +35,7 @@ def _custom_parser(multiline_string: str) -> str:
|
|||||||
multiline_string = multiline_string.decode()
|
multiline_string = multiline_string.decode()
|
||||||
|
|
||||||
multiline_string = re.sub(
|
multiline_string = re.sub(
|
||||||
r'("action_input"\:\s*")(.*)(")',
|
r'("action_input"\:\s*")(.*?)(")',
|
||||||
_replace_new_line,
|
_replace_new_line,
|
||||||
multiline_string,
|
multiline_string,
|
||||||
flags=re.DOTALL,
|
flags=re.DOTALL,
|
||||||
@ -138,7 +138,7 @@ def parse_json_markdown(
|
|||||||
The parsed JSON object as a Python dictionary.
|
The parsed JSON object as a Python dictionary.
|
||||||
"""
|
"""
|
||||||
# Try to find JSON string within triple backticks
|
# Try to find JSON string within triple backticks
|
||||||
match = re.search(r"```(json)?(.*)(```)?", json_string, re.DOTALL)
|
match = re.search(r"```(json)?(.*)", json_string, re.DOTALL)
|
||||||
|
|
||||||
# If no match found, assume the entire string is a JSON string
|
# If no match found, assume the entire string is a JSON string
|
||||||
if match is None:
|
if match is None:
|
||||||
@ -148,7 +148,7 @@ def parse_json_markdown(
|
|||||||
json_str = match.group(2)
|
json_str = match.group(2)
|
||||||
|
|
||||||
# Strip whitespace and newlines from the start and end
|
# Strip whitespace and newlines from the start and end
|
||||||
json_str = json_str.strip()
|
json_str = json_str.strip().strip("`")
|
||||||
|
|
||||||
# handle newlines and other special characters inside the returned value
|
# handle newlines and other special characters inside the returned value
|
||||||
json_str = _custom_parser(json_str)
|
json_str = _custom_parser(json_str)
|
||||||
@ -211,7 +211,8 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
|
|||||||
try:
|
try:
|
||||||
return parse_json_markdown(text)
|
return parse_json_markdown(text)
|
||||||
except JSONDecodeError as e:
|
except JSONDecodeError as e:
|
||||||
raise OutputParserException(f"Invalid json output: {text}") from e
|
msg = f"Invalid json output: {text}"
|
||||||
|
raise OutputParserException(msg, llm_output=text) from e
|
||||||
|
|
||||||
def parse(self, text: str) -> Any:
|
def parse(self, text: str) -> Any:
|
||||||
return self.parse_result([Generation(text=text)])
|
return self.parse_result([Generation(text=text)])
|
||||||
|
@ -70,21 +70,7 @@ JSON_WITH_MARKDOWN_CODE_BLOCK = """```json
|
|||||||
JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json
|
JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json
|
||||||
{
|
{
|
||||||
"action": "Final Answer",
|
"action": "Final Answer",
|
||||||
"action_input": "```bar\n<div id="1" class=\"value\">\n\ttext\n</div>```"
|
"action_input": "```bar\n<div id=\\"1\\" class=\\"value\\">\n\ttext\n</div>```"
|
||||||
}
|
|
||||||
```"""
|
|
||||||
|
|
||||||
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON = """```json
|
|
||||||
{
|
|
||||||
"action": "Final Answer",
|
|
||||||
"action_input": "{"foo": "bar", "bar": "foo"}"
|
|
||||||
}
|
|
||||||
```"""
|
|
||||||
|
|
||||||
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON = """```json
|
|
||||||
{
|
|
||||||
"action": "Final Answer",
|
|
||||||
"action_input": "{\"foo\": \"bar\", \"bar\": \"foo\"}"
|
|
||||||
}
|
}
|
||||||
```"""
|
```"""
|
||||||
|
|
||||||
@ -202,6 +188,8 @@ def test_parse_json_with_code_blocks() -> None:
|
|||||||
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK)
|
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK)
|
||||||
assert parsed == {"foo": "```bar```"}
|
assert parsed == {"foo": "```bar```"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_json_with_code_blocks_and_newlines() -> None:
|
||||||
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES)
|
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES)
|
||||||
|
|
||||||
assert parsed == {
|
assert parsed == {
|
||||||
@ -211,8 +199,6 @@ def test_parse_json_with_code_blocks() -> None:
|
|||||||
|
|
||||||
|
|
||||||
TEST_CASES_ESCAPED_QUOTES = [
|
TEST_CASES_ESCAPED_QUOTES = [
|
||||||
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON,
|
|
||||||
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON,
|
|
||||||
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON,
|
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1,10 +1,7 @@
|
|||||||
from langchain.chains.openai_functions.base import (
|
from langchain.chains.openai_functions.base import (
|
||||||
convert_to_openai_function,
|
convert_to_openai_function,
|
||||||
create_openai_fn_chain,
|
create_openai_fn_chain,
|
||||||
create_openai_fn_runnable,
|
|
||||||
create_structured_output_chain,
|
create_structured_output_chain,
|
||||||
create_structured_output_runnable,
|
|
||||||
get_openai_output_parser,
|
|
||||||
)
|
)
|
||||||
from langchain.chains.openai_functions.citation_fuzzy_match import (
|
from langchain.chains.openai_functions.citation_fuzzy_match import (
|
||||||
create_citation_fuzzy_match_chain,
|
create_citation_fuzzy_match_chain,
|
||||||
@ -21,6 +18,11 @@ from langchain.chains.openai_functions.tagging import (
|
|||||||
create_tagging_chain,
|
create_tagging_chain,
|
||||||
create_tagging_chain_pydantic,
|
create_tagging_chain_pydantic,
|
||||||
)
|
)
|
||||||
|
from langchain.chains.structured_output.base import (
|
||||||
|
create_openai_fn_runnable,
|
||||||
|
create_structured_output_runnable,
|
||||||
|
get_openai_output_parser,
|
||||||
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"convert_to_openai_function",
|
"convert_to_openai_function",
|
||||||
@ -33,7 +35,7 @@ __all__ = [
|
|||||||
"create_qa_with_sources_chain",
|
"create_qa_with_sources_chain",
|
||||||
"create_structured_output_chain",
|
"create_structured_output_chain",
|
||||||
"create_openai_fn_chain",
|
"create_openai_fn_chain",
|
||||||
"create_structured_output_runnable",
|
"create_structured_output_runnable", # backwards compatibility
|
||||||
"create_openai_fn_runnable",
|
"create_openai_fn_runnable", # backwards compatibility
|
||||||
"get_openai_output_parser",
|
"get_openai_output_parser", # backwards compatibility
|
||||||
]
|
]
|
||||||
|
@ -12,229 +12,34 @@ from typing import (
|
|||||||
from langchain_core._api import deprecated
|
from langchain_core._api import deprecated
|
||||||
from langchain_core.language_models import BaseLanguageModel
|
from langchain_core.language_models import BaseLanguageModel
|
||||||
from langchain_core.output_parsers import (
|
from langchain_core.output_parsers import (
|
||||||
BaseGenerationOutputParser,
|
|
||||||
BaseLLMOutputParser,
|
BaseLLMOutputParser,
|
||||||
BaseOutputParser,
|
|
||||||
)
|
)
|
||||||
from langchain_core.prompts import BasePromptTemplate
|
from langchain_core.prompts import BasePromptTemplate
|
||||||
from langchain_core.pydantic_v1 import BaseModel
|
from langchain_core.pydantic_v1 import BaseModel
|
||||||
from langchain_core.runnables import Runnable
|
|
||||||
from langchain_core.utils.function_calling import (
|
from langchain_core.utils.function_calling import (
|
||||||
PYTHON_TO_JSON_TYPES,
|
PYTHON_TO_JSON_TYPES,
|
||||||
convert_to_openai_function,
|
convert_to_openai_function,
|
||||||
)
|
)
|
||||||
|
|
||||||
from langchain.chains import LLMChain
|
from langchain.chains import LLMChain
|
||||||
|
from langchain.chains.structured_output.base import (
|
||||||
|
create_openai_fn_runnable,
|
||||||
|
create_structured_output_runnable,
|
||||||
|
get_openai_output_parser,
|
||||||
|
)
|
||||||
from langchain.output_parsers.openai_functions import (
|
from langchain.output_parsers.openai_functions import (
|
||||||
JsonOutputFunctionsParser,
|
|
||||||
PydanticAttrOutputFunctionsParser,
|
PydanticAttrOutputFunctionsParser,
|
||||||
PydanticOutputFunctionsParser,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
def get_openai_output_parser(
|
"get_openai_output_parser",
|
||||||
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
|
"create_openai_fn_runnable",
|
||||||
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
|
"create_structured_output_runnable",
|
||||||
"""Get the appropriate function output parser given the user functions.
|
"create_openai_fn_chain", # deprecated
|
||||||
|
"create_structured_output_chain", # deprecated
|
||||||
Args:
|
"PYTHON_TO_JSON_TYPES", # backwards compatibility
|
||||||
functions: Sequence where element is a dictionary, a pydantic.BaseModel class,
|
"convert_to_openai_function", # backwards compatibility
|
||||||
or a Python function. If a dictionary is passed in, it is assumed to
|
]
|
||||||
already be a valid OpenAI function.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A PydanticOutputFunctionsParser if functions are Pydantic classes, otherwise
|
|
||||||
a JsonOutputFunctionsParser. If there's only one function and it is
|
|
||||||
not a Pydantic class, then the output parser will automatically extract
|
|
||||||
only the function arguments and not the function name.
|
|
||||||
"""
|
|
||||||
function_names = [convert_to_openai_function(f)["name"] for f in functions]
|
|
||||||
if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
|
|
||||||
if len(functions) > 1:
|
|
||||||
pydantic_schema: Union[Dict, Type[BaseModel]] = {
|
|
||||||
name: fn for name, fn in zip(function_names, functions)
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
pydantic_schema = functions[0]
|
|
||||||
output_parser: Union[
|
|
||||||
BaseOutputParser, BaseGenerationOutputParser
|
|
||||||
] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
|
|
||||||
else:
|
|
||||||
output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
|
|
||||||
return output_parser
|
|
||||||
|
|
||||||
|
|
||||||
def create_openai_fn_runnable(
|
|
||||||
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
|
|
||||||
llm: Runnable,
|
|
||||||
prompt: BasePromptTemplate,
|
|
||||||
*,
|
|
||||||
enforce_single_function_usage: bool = True,
|
|
||||||
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
|
||||||
**kwargs: Any,
|
|
||||||
) -> Runnable:
|
|
||||||
"""Create a runnable sequence that uses OpenAI functions.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
functions: A sequence of either dictionaries, pydantic.BaseModels classes, or
|
|
||||||
Python functions. If dictionaries are passed in, they are assumed to
|
|
||||||
already be a valid OpenAI functions. If only a single
|
|
||||||
function is passed in, then it will be enforced that the model use that
|
|
||||||
function. pydantic.BaseModels and Python functions should have docstrings
|
|
||||||
describing what the function does. For best results, pydantic.BaseModels
|
|
||||||
should have descriptions of the parameters and Python functions should have
|
|
||||||
Google Python style args descriptions in the docstring. Additionally,
|
|
||||||
Python functions should only use primitive types (str, int, float, bool) or
|
|
||||||
pydantic.BaseModels for arguments.
|
|
||||||
llm: Language model to use, assumed to support the OpenAI function-calling API.
|
|
||||||
prompt: BasePromptTemplate to pass to the model.
|
|
||||||
enforce_single_function_usage: only used if a single function is passed in. If
|
|
||||||
True, then the model will be forced to use the given function. If False,
|
|
||||||
then the model will be given the option to use the given function or not.
|
|
||||||
output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
|
|
||||||
will be inferred from the function types. If pydantic.BaseModels are passed
|
|
||||||
in, then the OutputParser will try to parse outputs using those. Otherwise
|
|
||||||
model outputs will simply be parsed as JSON. If multiple functions are
|
|
||||||
passed in and they are not pydantic.BaseModels, the chain output will
|
|
||||||
include both the name of the function that was returned and the arguments
|
|
||||||
to pass to the function.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A runnable sequence that will pass in the given functions to the model when run.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from langchain.chains.openai_functions import create_openai_fn_runnable
|
|
||||||
from langchain_community.chat_models import ChatOpenAI
|
|
||||||
from langchain_core.prompts import ChatPromptTemplate
|
|
||||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
|
||||||
|
|
||||||
|
|
||||||
class RecordPerson(BaseModel):
|
|
||||||
\"\"\"Record some identifying information about a person.\"\"\"
|
|
||||||
|
|
||||||
name: str = Field(..., description="The person's name")
|
|
||||||
age: int = Field(..., description="The person's age")
|
|
||||||
fav_food: Optional[str] = Field(None, description="The person's favorite food")
|
|
||||||
|
|
||||||
|
|
||||||
class RecordDog(BaseModel):
|
|
||||||
\"\"\"Record some identifying information about a dog.\"\"\"
|
|
||||||
|
|
||||||
name: str = Field(..., description="The dog's name")
|
|
||||||
color: str = Field(..., description="The dog's color")
|
|
||||||
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
|
||||||
|
|
||||||
|
|
||||||
llm = ChatOpenAI(model="gpt-4", temperature=0)
|
|
||||||
prompt = ChatPromptTemplate.from_messages(
|
|
||||||
[
|
|
||||||
("system", "You are a world class algorithm for recording entities."),
|
|
||||||
("human", "Make calls to the relevant function to record the entities in the following input: {input}"),
|
|
||||||
("human", "Tip: Make sure to answer in the correct format"),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
chain = create_openai_fn_runnable([RecordPerson, RecordDog], llm, prompt)
|
|
||||||
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
|
|
||||||
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")
|
|
||||||
""" # noqa: E501
|
|
||||||
if not functions:
|
|
||||||
raise ValueError("Need to pass in at least one function. Received zero.")
|
|
||||||
openai_functions = [convert_to_openai_function(f) for f in functions]
|
|
||||||
llm_kwargs: Dict[str, Any] = {"functions": openai_functions, **kwargs}
|
|
||||||
if len(openai_functions) == 1 and enforce_single_function_usage:
|
|
||||||
llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]}
|
|
||||||
output_parser = output_parser or get_openai_output_parser(functions)
|
|
||||||
return prompt | llm.bind(**llm_kwargs) | output_parser
|
|
||||||
|
|
||||||
|
|
||||||
def create_structured_output_runnable(
|
|
||||||
output_schema: Union[Dict[str, Any], Type[BaseModel]],
|
|
||||||
llm: Runnable,
|
|
||||||
prompt: BasePromptTemplate,
|
|
||||||
*,
|
|
||||||
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
|
||||||
**kwargs: Any,
|
|
||||||
) -> Runnable:
|
|
||||||
"""Create a runnable that uses an OpenAI function to get a structured output.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
|
|
||||||
is passed in, it's assumed to already be a valid JsonSchema.
|
|
||||||
For best results, pydantic.BaseModels should have docstrings describing what
|
|
||||||
the schema represents and descriptions for the parameters.
|
|
||||||
llm: Language model to use, assumed to support the OpenAI function-calling API.
|
|
||||||
prompt: BasePromptTemplate to pass to the model.
|
|
||||||
output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
|
|
||||||
will be inferred from the function types. If pydantic.BaseModels are passed
|
|
||||||
in, then the OutputParser will try to parse outputs using those. Otherwise
|
|
||||||
model outputs will simply be parsed as JSON.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A runnable sequence that will pass the given function to the model when run.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from langchain.chains.openai_functions import create_structured_output_runnable
|
|
||||||
from langchain_community.chat_models import ChatOpenAI
|
|
||||||
from langchain_core.prompts import ChatPromptTemplate
|
|
||||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
|
||||||
|
|
||||||
class Dog(BaseModel):
|
|
||||||
\"\"\"Identifying information about a dog.\"\"\"
|
|
||||||
|
|
||||||
name: str = Field(..., description="The dog's name")
|
|
||||||
color: str = Field(..., description="The dog's color")
|
|
||||||
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
|
||||||
|
|
||||||
llm = ChatOpenAI(model="gpt-3.5-turbo-0613", temperature=0)
|
|
||||||
prompt = ChatPromptTemplate.from_messages(
|
|
||||||
[
|
|
||||||
("system", "You are a world class algorithm for extracting information in structured formats."),
|
|
||||||
("human", "Use the given format to extract information from the following input: {input}"),
|
|
||||||
("human", "Tip: Make sure to answer in the correct format"),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
chain = create_structured_output_runnable(Dog, llm, prompt)
|
|
||||||
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
|
|
||||||
# -> Dog(name="Harry", color="brown", fav_food="chicken")
|
|
||||||
""" # noqa: E501
|
|
||||||
if isinstance(output_schema, dict):
|
|
||||||
function: Any = {
|
|
||||||
"name": "output_formatter",
|
|
||||||
"description": (
|
|
||||||
"Output formatter. Should always be used to format your response to the"
|
|
||||||
" user."
|
|
||||||
),
|
|
||||||
"parameters": output_schema,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
|
|
||||||
class _OutputFormatter(BaseModel):
|
|
||||||
"""Output formatter. Should always be used to format your response to the user.""" # noqa: E501
|
|
||||||
|
|
||||||
output: output_schema # type: ignore
|
|
||||||
|
|
||||||
function = _OutputFormatter
|
|
||||||
output_parser = output_parser or PydanticAttrOutputFunctionsParser(
|
|
||||||
pydantic_schema=_OutputFormatter, attr_name="output"
|
|
||||||
)
|
|
||||||
return create_openai_fn_runnable(
|
|
||||||
[function],
|
|
||||||
llm,
|
|
||||||
prompt,
|
|
||||||
output_parser=output_parser,
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
""" --- Legacy --- """
|
|
||||||
|
|
||||||
|
|
||||||
@deprecated(since="0.1.1", removal="0.2.0", alternative="create_openai_fn_runnable")
|
@deprecated(since="0.1.1", removal="0.2.0", alternative="create_openai_fn_runnable")
|
||||||
@ -426,14 +231,3 @@ def create_structured_output_chain(
|
|||||||
output_parser=output_parser,
|
output_parser=output_parser,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"create_openai_fn_chain",
|
|
||||||
"create_openai_fn_runnable",
|
|
||||||
"create_structured_output_chain",
|
|
||||||
"create_structured_output_runnable",
|
|
||||||
"get_openai_output_parser",
|
|
||||||
"PYTHON_TO_JSON_TYPES",
|
|
||||||
"convert_to_openai_function",
|
|
||||||
]
|
|
||||||
|
@ -0,0 +1,6 @@
|
|||||||
|
from langchain.chains.structured_output.base import (
|
||||||
|
create_openai_fn_runnable,
|
||||||
|
create_structured_output_runnable,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = ["create_structured_output_runnable", "create_openai_fn_runnable"]
|
321
libs/langchain/langchain/chains/structured_output/base.py
Normal file
321
libs/langchain/langchain/chains/structured_output/base.py
Normal file
@ -0,0 +1,321 @@
|
|||||||
|
import json
|
||||||
|
from typing import Any, Callable, Dict, Literal, Optional, Sequence, Type, Union
|
||||||
|
|
||||||
|
from langchain_core.output_parsers import (
|
||||||
|
BaseGenerationOutputParser,
|
||||||
|
BaseOutputParser,
|
||||||
|
JsonOutputParser,
|
||||||
|
)
|
||||||
|
from langchain_core.prompts import BasePromptTemplate
|
||||||
|
from langchain_core.pydantic_v1 import BaseModel
|
||||||
|
from langchain_core.runnables import Runnable
|
||||||
|
from langchain_core.utils.function_calling import convert_to_openai_function
|
||||||
|
|
||||||
|
from langchain.output_parsers import PydanticOutputParser
|
||||||
|
from langchain.output_parsers.openai_functions import (
|
||||||
|
JsonOutputFunctionsParser,
|
||||||
|
PydanticAttrOutputFunctionsParser,
|
||||||
|
PydanticOutputFunctionsParser,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_openai_fn_runnable(
|
||||||
|
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
|
||||||
|
llm: Runnable,
|
||||||
|
prompt: BasePromptTemplate,
|
||||||
|
*,
|
||||||
|
enforce_single_function_usage: bool = True,
|
||||||
|
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> Runnable:
|
||||||
|
"""Create a runnable sequence that uses OpenAI functions.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
functions: A sequence of either dictionaries, pydantic.BaseModels classes, or
|
||||||
|
Python functions. If dictionaries are passed in, they are assumed to
|
||||||
|
already be a valid OpenAI functions. If only a single
|
||||||
|
function is passed in, then it will be enforced that the model use that
|
||||||
|
function. pydantic.BaseModels and Python functions should have docstrings
|
||||||
|
describing what the function does. For best results, pydantic.BaseModels
|
||||||
|
should have descriptions of the parameters and Python functions should have
|
||||||
|
Google Python style args descriptions in the docstring. Additionally,
|
||||||
|
Python functions should only use primitive types (str, int, float, bool) or
|
||||||
|
pydantic.BaseModels for arguments.
|
||||||
|
llm: Language model to use, assumed to support the OpenAI function-calling API.
|
||||||
|
prompt: BasePromptTemplate to pass to the model.
|
||||||
|
enforce_single_function_usage: only used if a single function is passed in. If
|
||||||
|
True, then the model will be forced to use the given function. If False,
|
||||||
|
then the model will be given the option to use the given function or not.
|
||||||
|
output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
|
||||||
|
will be inferred from the function types. If pydantic.BaseModels are passed
|
||||||
|
in, then the OutputParser will try to parse outputs using those. Otherwise
|
||||||
|
model outputs will simply be parsed as JSON. If multiple functions are
|
||||||
|
passed in and they are not pydantic.BaseModels, the chain output will
|
||||||
|
include both the name of the function that was returned and the arguments
|
||||||
|
to pass to the function.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A runnable sequence that will pass in the given functions to the model when run.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from langchain.chains.structured_output import create_openai_fn_runnable
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
|
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class RecordPerson(BaseModel):
|
||||||
|
'''Record some identifying information about a person.'''
|
||||||
|
|
||||||
|
name: str = Field(..., description="The person's name")
|
||||||
|
age: int = Field(..., description="The person's age")
|
||||||
|
fav_food: Optional[str] = Field(None, description="The person's favorite food")
|
||||||
|
|
||||||
|
|
||||||
|
class RecordDog(BaseModel):
|
||||||
|
'''Record some identifying information about a dog.'''
|
||||||
|
|
||||||
|
name: str = Field(..., description="The dog's name")
|
||||||
|
color: str = Field(..., description="The dog's color")
|
||||||
|
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
||||||
|
|
||||||
|
|
||||||
|
llm = ChatOpenAI(model="gpt-4", temperature=0)
|
||||||
|
prompt = ChatPromptTemplate.from_messages(
|
||||||
|
[
|
||||||
|
("system", "You are a world class algorithm for recording entities."),
|
||||||
|
("human", "Make calls to the relevant function to record the entities in the following input: {input}"),
|
||||||
|
("human", "Tip: Make sure to answer in the correct format"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
chain = create_openai_fn_runnable([RecordPerson, RecordDog], llm, prompt)
|
||||||
|
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
|
||||||
|
# -> RecordDog(name="Harry", color="brown", fav_food="chicken")
|
||||||
|
""" # noqa: E501
|
||||||
|
if not functions:
|
||||||
|
raise ValueError("Need to pass in at least one function. Received zero.")
|
||||||
|
openai_functions = [convert_to_openai_function(f) for f in functions]
|
||||||
|
llm_kwargs: Dict[str, Any] = {"functions": openai_functions, **kwargs}
|
||||||
|
if len(openai_functions) == 1 and enforce_single_function_usage:
|
||||||
|
llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]}
|
||||||
|
output_parser = output_parser or get_openai_output_parser(functions)
|
||||||
|
return prompt | llm.bind(**llm_kwargs) | output_parser
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: implement mode='openai-tools'.
|
||||||
|
def create_structured_output_runnable(
|
||||||
|
output_schema: Union[Dict[str, Any], Type[BaseModel]],
|
||||||
|
llm: Runnable,
|
||||||
|
prompt: BasePromptTemplate,
|
||||||
|
*,
|
||||||
|
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
||||||
|
mode: Literal["openai-functions", "openai-json"] = "openai-functions",
|
||||||
|
enforce_single_function_usage: bool = True,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> Runnable:
|
||||||
|
"""Create a runnable for extracting structured outputs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
|
||||||
|
is passed in, it's assumed to already be a valid JsonSchema.
|
||||||
|
For best results, pydantic.BaseModels should have docstrings describing what
|
||||||
|
the schema represents and descriptions for the parameters.
|
||||||
|
llm: Language model to use. Assumed to support the OpenAI function-calling API
|
||||||
|
if mode is 'openai-function'. Assumed to support OpenAI response_format
|
||||||
|
parameter if mode is 'openai-json'.
|
||||||
|
prompt: BasePromptTemplate to pass to the model. If mode is 'openai-json' and
|
||||||
|
prompt has input variable 'output_schema' then the given output_schema
|
||||||
|
will be converted to a JsonSchema and inserted in the prompt.
|
||||||
|
output_parser: Output parser to use for parsing model outputs. By default
|
||||||
|
will be inferred from the function types. If pydantic.BaseModel is passed
|
||||||
|
in, then the OutputParser will try to parse outputs using the pydantic
|
||||||
|
class. Otherwise model outputs will be parsed as JSON.
|
||||||
|
mode: How structured outputs are extracted from the model. If 'openai-functions'
|
||||||
|
then OpenAI function calling is used. If 'openai-json' then OpenAI model
|
||||||
|
with response_format set to JSON is used.
|
||||||
|
enforce_single_function_usage: Only used if mode is 'openai-functions'. Only
|
||||||
|
used if a single function is passed in. If
|
||||||
|
True, then the model will be forced to use the given function. If False,
|
||||||
|
then the model will be given the option to use the given function or not.
|
||||||
|
**kwargs: Additional named arguments.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A runnable sequence that will return a structured output matching the given
|
||||||
|
output_schema.
|
||||||
|
|
||||||
|
OpenAI functions example:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from langchain.chains.structured_output import create_structured_output_runnable
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
|
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||||
|
|
||||||
|
class Dog(BaseModel):
|
||||||
|
'''Identifying information about a dog.'''
|
||||||
|
|
||||||
|
name: str = Field(..., description="The dog's name")
|
||||||
|
color: str = Field(..., description="The dog's color")
|
||||||
|
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
||||||
|
|
||||||
|
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
|
||||||
|
prompt = ChatPromptTemplate.from_messages(
|
||||||
|
[
|
||||||
|
("system", "You are a world class algorithm for extracting information in structured formats."),
|
||||||
|
("human", "Use the given format to extract information from the following input: {input}"),
|
||||||
|
("human", "Tip: Make sure to answer in the correct format"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
chain = create_structured_output_runnable(Dog, llm, prompt, mode="openai-functions")
|
||||||
|
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
|
||||||
|
# -> Dog(name="Harry", color="brown", fav_food="chicken")
|
||||||
|
|
||||||
|
OpenAI json response format example:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from langchain.chains.structured_output import create_structured_output_runnable
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
|
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||||
|
|
||||||
|
class Dog(BaseModel):
|
||||||
|
'''Identifying information about a dog.'''
|
||||||
|
|
||||||
|
name: str = Field(..., description="The dog's name")
|
||||||
|
color: str = Field(..., description="The dog's color")
|
||||||
|
fav_food: Optional[str] = Field(None, description="The dog's favorite food")
|
||||||
|
|
||||||
|
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
|
||||||
|
system = '''You are a world class assistant for extracting information in structured JSON formats. \
|
||||||
|
|
||||||
|
Extract a valid JSON blob from the user input that matches the following JSON Schema:
|
||||||
|
|
||||||
|
{output_schema}'''
|
||||||
|
prompt = ChatPromptTemplate.from_messages(
|
||||||
|
[
|
||||||
|
("system", system),
|
||||||
|
("human", "{input}"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
chain = create_structured_output_runnable(Dog, llm, prompt, mode="openai-json")
|
||||||
|
chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
|
||||||
|
""" # noqa: E501
|
||||||
|
if mode == "openai-functions":
|
||||||
|
return _create_openai_functions_structured_output_runnable(
|
||||||
|
output_schema,
|
||||||
|
llm,
|
||||||
|
prompt,
|
||||||
|
output_parser=output_parser,
|
||||||
|
enforce_single_function_usage=enforce_single_function_usage,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
elif mode == "openai-json":
|
||||||
|
return _create_openai_json_runnable(
|
||||||
|
output_schema, llm, prompt, output_parser=output_parser, **kwargs
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid mode {mode}. Expected one of 'openai-functions', "
|
||||||
|
f"'openai-json'."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_openai_output_parser(
|
||||||
|
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
|
||||||
|
) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
|
||||||
|
"""Get the appropriate function output parser given the user functions.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
functions: Sequence where element is a dictionary, a pydantic.BaseModel class,
|
||||||
|
or a Python function. If a dictionary is passed in, it is assumed to
|
||||||
|
already be a valid OpenAI function.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A PydanticOutputFunctionsParser if functions are Pydantic classes, otherwise
|
||||||
|
a JsonOutputFunctionsParser. If there's only one function and it is
|
||||||
|
not a Pydantic class, then the output parser will automatically extract
|
||||||
|
only the function arguments and not the function name.
|
||||||
|
"""
|
||||||
|
function_names = [convert_to_openai_function(f)["name"] for f in functions]
|
||||||
|
if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
|
||||||
|
if len(functions) > 1:
|
||||||
|
pydantic_schema: Union[Dict, Type[BaseModel]] = {
|
||||||
|
name: fn for name, fn in zip(function_names, functions)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
pydantic_schema = functions[0]
|
||||||
|
output_parser: Union[
|
||||||
|
BaseOutputParser, BaseGenerationOutputParser
|
||||||
|
] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
|
||||||
|
else:
|
||||||
|
output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
|
||||||
|
return output_parser
|
||||||
|
|
||||||
|
|
||||||
|
def _create_openai_json_runnable(
|
||||||
|
output_schema: Union[Dict[str, Any], Type[BaseModel]],
|
||||||
|
llm: Runnable,
|
||||||
|
prompt: BasePromptTemplate,
|
||||||
|
*,
|
||||||
|
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
||||||
|
) -> Runnable:
|
||||||
|
""""""
|
||||||
|
if isinstance(output_schema, type) and issubclass(output_schema, BaseModel):
|
||||||
|
output_parser = output_parser or PydanticOutputParser(
|
||||||
|
pydantic_object=output_schema,
|
||||||
|
)
|
||||||
|
schema_as_dict = convert_to_openai_function(output_schema)["parameters"]
|
||||||
|
else:
|
||||||
|
output_parser = output_parser or JsonOutputParser()
|
||||||
|
schema_as_dict = output_schema
|
||||||
|
|
||||||
|
if "output_schema" in prompt.input_variables:
|
||||||
|
prompt = prompt.partial(output_schema=json.dumps(schema_as_dict, indent=2))
|
||||||
|
|
||||||
|
llm = llm.bind(response_format={"type": "json_object"})
|
||||||
|
return prompt | llm | output_parser
|
||||||
|
|
||||||
|
|
||||||
|
def _create_openai_functions_structured_output_runnable(
|
||||||
|
output_schema: Union[Dict[str, Any], Type[BaseModel]],
|
||||||
|
llm: Runnable,
|
||||||
|
prompt: BasePromptTemplate,
|
||||||
|
*,
|
||||||
|
output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> Runnable:
|
||||||
|
if isinstance(output_schema, dict):
|
||||||
|
function: Any = {
|
||||||
|
"name": "output_formatter",
|
||||||
|
"description": (
|
||||||
|
"Output formatter. Should always be used to format your response to the"
|
||||||
|
" user."
|
||||||
|
),
|
||||||
|
"parameters": output_schema,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
|
||||||
|
class _OutputFormatter(BaseModel):
|
||||||
|
"""Output formatter. Should always be used to format your response to the user.""" # noqa: E501
|
||||||
|
|
||||||
|
output: output_schema # type: ignore
|
||||||
|
|
||||||
|
function = _OutputFormatter
|
||||||
|
output_parser = output_parser or PydanticAttrOutputFunctionsParser(
|
||||||
|
pydantic_schema=_OutputFormatter, attr_name="output"
|
||||||
|
)
|
||||||
|
return create_openai_fn_runnable(
|
||||||
|
[function],
|
||||||
|
llm,
|
||||||
|
prompt,
|
||||||
|
output_parser=output_parser,
|
||||||
|
**kwargs,
|
||||||
|
)
|
@ -1,42 +1,32 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
from typing import Any, List, Type
|
||||||
from typing import Type, TypeVar
|
|
||||||
|
|
||||||
from langchain_core.exceptions import OutputParserException
|
from langchain_core.exceptions import OutputParserException
|
||||||
from langchain_core.output_parsers import BaseOutputParser
|
from langchain_core.output_parsers import JsonOutputParser
|
||||||
|
from langchain_core.outputs import Generation
|
||||||
from langchain_core.pydantic_v1 import BaseModel, ValidationError
|
from langchain_core.pydantic_v1 import BaseModel, ValidationError
|
||||||
|
|
||||||
from langchain.output_parsers.format_instructions import PYDANTIC_FORMAT_INSTRUCTIONS
|
from langchain.output_parsers.format_instructions import PYDANTIC_FORMAT_INSTRUCTIONS
|
||||||
|
|
||||||
T = TypeVar("T", bound=BaseModel)
|
|
||||||
|
|
||||||
|
class PydanticOutputParser(JsonOutputParser):
|
||||||
class PydanticOutputParser(BaseOutputParser[T]):
|
|
||||||
"""Parse an output using a pydantic model."""
|
"""Parse an output using a pydantic model."""
|
||||||
|
|
||||||
pydantic_object: Type[T]
|
pydantic_object: Type[BaseModel]
|
||||||
"""The pydantic model to parse.
|
"""The pydantic model to parse.
|
||||||
|
|
||||||
Attention: To avoid potential compatibility issues, it's recommended to use
|
Attention: To avoid potential compatibility issues, it's recommended to use
|
||||||
pydantic <2 or leverage the v1 namespace in pydantic >= 2.
|
pydantic <2 or leverage the v1 namespace in pydantic >= 2.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def parse(self, text: str) -> T:
|
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
|
||||||
|
json_object = super().parse_result(result)
|
||||||
try:
|
try:
|
||||||
# Greedy search for 1st json candidate.
|
|
||||||
match = re.search(
|
|
||||||
r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL
|
|
||||||
)
|
|
||||||
json_str = ""
|
|
||||||
if match:
|
|
||||||
json_str = match.group()
|
|
||||||
json_object = json.loads(json_str, strict=False)
|
|
||||||
return self.pydantic_object.parse_obj(json_object)
|
return self.pydantic_object.parse_obj(json_object)
|
||||||
|
except ValidationError as e:
|
||||||
except (json.JSONDecodeError, ValidationError) as e:
|
|
||||||
name = self.pydantic_object.__name__
|
name = self.pydantic_object.__name__
|
||||||
msg = f"Failed to parse {name} from completion {text}. Got: {e}"
|
msg = f"Failed to parse {name} from completion {json_object}. Got: {e}"
|
||||||
raise OutputParserException(msg, llm_output=text)
|
raise OutputParserException(msg, llm_output=json_object)
|
||||||
|
|
||||||
def get_format_instructions(self) -> str:
|
def get_format_instructions(self) -> str:
|
||||||
schema = self.pydantic_object.schema()
|
schema = self.pydantic_object.schema()
|
||||||
@ -57,6 +47,6 @@ class PydanticOutputParser(BaseOutputParser[T]):
|
|||||||
return "pydantic"
|
return "pydantic"
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def OutputType(self) -> Type[T]:
|
def OutputType(self) -> Type[BaseModel]:
|
||||||
"""Return the pydantic model."""
|
"""Return the pydantic model."""
|
||||||
return self.pydantic_object
|
return self.pydantic_object
|
||||||
|
@ -53,7 +53,7 @@ DEF_EXPECTED_RESULT = TestModel(
|
|||||||
def test_pydantic_output_parser() -> None:
|
def test_pydantic_output_parser() -> None:
|
||||||
"""Test PydanticOutputParser."""
|
"""Test PydanticOutputParser."""
|
||||||
|
|
||||||
pydantic_parser: PydanticOutputParser[TestModel] = PydanticOutputParser(
|
pydantic_parser: PydanticOutputParser = PydanticOutputParser(
|
||||||
pydantic_object=TestModel
|
pydantic_object=TestModel
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -65,7 +65,7 @@ def test_pydantic_output_parser() -> None:
|
|||||||
def test_pydantic_output_parser_fail() -> None:
|
def test_pydantic_output_parser_fail() -> None:
|
||||||
"""Test PydanticOutputParser where completion result fails schema validation."""
|
"""Test PydanticOutputParser where completion result fails schema validation."""
|
||||||
|
|
||||||
pydantic_parser: PydanticOutputParser[TestModel] = PydanticOutputParser(
|
pydantic_parser: PydanticOutputParser = PydanticOutputParser(
|
||||||
pydantic_object=TestModel
|
pydantic_object=TestModel
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user