OpenAI runnable constructor (#12455)

2025-09-13 21:47:12 +00:00 · 2023-10-29 13:40:30 -07:00
parent a830b809f3
commit 1815ea2fdb
3 changed files with 368 additions and 134 deletions
--- a/libs/langchain/langchain/chains/openai_functions/init.py
+++ b/libs/langchain/langchain/chains/openai_functions/init.py
@@ -1,6 +1,10 @@
 from langchain.chains.openai_functions.base import (
+    convert_to_openai_function,
    create_openai_fn_chain,
+    create_openai_fn_runnable,
    create_structured_output_chain,
+    create_structured_output_runnable,
+    get_openai_output_parser,
 )
 from langchain.chains.openai_functions.citation_fuzzy_match import (
    create_citation_fuzzy_match_chain,
@@ -19,6 +23,7 @@ from langchain.chains.openai_functions.tagging import (
 )

 __all__ = [
+    "convert_to_openai_function",
    "create_tagging_chain",
    "create_tagging_chain_pydantic",
    "create_extraction_chain_pydantic",
@@ -28,4 +33,7 @@ __all__ = [
    "create_qa_with_sources_chain",
    "create_structured_output_chain",
    "create_openai_fn_chain",
+    "create_structured_output_runnable",
+    "create_openai_fn_runnable",
+    "get_openai_output_parser",
 ]
--- a/libs/langchain/langchain/chains/openai_functions/base.py
+++ b/libs/langchain/langchain/chains/openai_functions/base.py
@@ -23,6 +23,8 @@ from langchain.output_parsers.openai_functions import (
 from langchain.prompts import BasePromptTemplate
 from langchain.pydantic_v1 import BaseModel
 from langchain.schema import BaseLLMOutputParser
+from langchain.schema.output_parser import BaseGenerationOutputParser, BaseOutputParser
+from langchain.schema.runnable import Runnable
 from langchain.utils.openai_functions import convert_pydantic_to_openai_function

 PYTHON_TO_JSON_TYPES = {
@@ -161,11 +163,23 @@ def convert_to_openai_function(
        )


-def _get_openai_output_parser(
+def get_openai_output_parser(
    functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
-    function_names: Sequence[str],
-) -> BaseLLMOutputParser:
-    """Get the appropriate function output parser given the user functions."""
+) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
+    """Get the appropriate function output parser given the user functions.
+
+    Args:
+        functions: Sequence where element is a dictionary, a pydantic.BaseModel class,
+            or a Python function. If a dictionary is passed in, it is assumed to
+            already be a valid OpenAI function.
+
+    Returns:
+        A PydanticOutputFunctionsParser if functions are Pydantic classes, otherwise
+            a JsonOutputFunctionsParser. If there's only one function and it is
+            not a Pydantic class, then the output parser will automatically extract
+            only the function arguments and not the function name.
+    """
+    function_names = [convert_to_openai_function(f)["name"] for f in functions]
    if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
        if len(functions) > 1:
            pydantic_schema: Union[Dict, Type[BaseModel]] = {
@@ -173,14 +187,183 @@ def _get_openai_output_parser(
            }
        else:
            pydantic_schema = functions[0]
-        output_parser: BaseLLMOutputParser = PydanticOutputFunctionsParser(
-            pydantic_schema=pydantic_schema
-        )
+        output_parser: Union[
+            BaseOutputParser, BaseGenerationOutputParser
+        ] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
    else:
        output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
    return output_parser


+def create_openai_fn_runnable(
+    functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
+    llm: Runnable,
+    prompt: BasePromptTemplate,
+    *,
+    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
+    **kwargs: Any,
+) -> Runnable:
+    """Create a runnable sequence that uses OpenAI functions.
+
+    Args:
+        functions: A sequence of either dictionaries, pydantic.BaseModels classes, or
+            Python functions. If dictionaries are passed in, they are assumed to
+            already be a valid OpenAI functions. If only a single
+            function is passed in, then it will be enforced that the model use that
+            function. pydantic.BaseModels and Python functions should have docstrings
+            describing what the function does. For best results, pydantic.BaseModels
+            should have descriptions of the parameters and Python functions should have
+            Google Python style args descriptions in the docstring. Additionally,
+            Python functions should only use primitive types (str, int, float, bool) or
+            pydantic.BaseModels for arguments.
+        llm: Language model to use, assumed to support the OpenAI function-calling API.
+        prompt: BasePromptTemplate to pass to the model.
+        output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
+            will be inferred from the function types. If pydantic.BaseModels are passed
+            in, then the OutputParser will try to parse outputs using those. Otherwise
+            model outputs will simply be parsed as JSON. If multiple functions are
+            passed in and they are not pydantic.BaseModels, the chain output will
+            include both the name of the function that was returned and the arguments
+            to pass to the function.
+
+    Returns:
+        A runnable sequence that will pass in the given functions to the model when run.
+
+    Example:
+        .. code-block:: python
+
+                from typing import Optional
+
+                from langchain.chains.openai_functions import create_openai_fn_chain
+                from langchain.chat_models import ChatOpenAI
+                from langchain.prompts import ChatPromptTemplate
+                from langchain.pydantic_v1 import BaseModel, Field
+
+
+                class RecordPerson(BaseModel):
+                    \"\"\"Record some identifying information about a person.\"\"\"
+
+                    name: str = Field(..., description="The person's name")
+                    age: int = Field(..., description="The person's age")
+                    fav_food: Optional[str] = Field(None, description="The person's favorite food")
+
+
+                class RecordDog(BaseModel):
+                    \"\"\"Record some identifying information about a dog.\"\"\"
+
+                    name: str = Field(..., description="The dog's name")
+                    color: str = Field(..., description="The dog's color")
+                    fav_food: Optional[str] = Field(None, description="The dog's favorite food")
+
+
+                llm = ChatOpenAI(model="gpt-4", temperature=0)
+                prompt = ChatPromptTemplate.from_messages(
+                    [
+                        ("system", "You are a world class algorithm for recording entities."),
+                        ("human", "Make calls to the relevant function to record the entities in the following input: {input}"),
+                        ("human", "Tip: Make sure to answer in the correct format"),
+                    ]
+                )
+                chain = create_openai_fn_runnable([RecordPerson, RecordDog], llm, prompt)
+                chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
+                # -> RecordDog(name="Harry", color="brown", fav_food="chicken")
+    """  # noqa: E501
+    if not functions:
+        raise ValueError("Need to pass in at least one function. Received zero.")
+    openai_functions = [convert_to_openai_function(f) for f in functions]
+    llm_kwargs: Dict[str, Any] = {"functions": openai_functions, **kwargs}
+    if len(openai_functions) == 1:
+        llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]}
+    output_parser = output_parser or get_openai_output_parser(functions)
+    return prompt | llm.bind(**llm_kwargs) | output_parser
+
+
+def create_structured_output_runnable(
+    output_schema: Union[Dict[str, Any], Type[BaseModel]],
+    llm: Runnable,
+    prompt: BasePromptTemplate,
+    *,
+    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
+    **kwargs: Any,
+) -> Runnable:
+    """Create a runnable that uses an OpenAI function to get a structured output.
+
+    Args:
+        output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
+            is passed in, it's assumed to already be a valid JsonSchema.
+            For best results, pydantic.BaseModels should have docstrings describing what
+            the schema represents and descriptions for the parameters.
+        llm: Language model to use, assumed to support the OpenAI function-calling API.
+        prompt: BasePromptTemplate to pass to the model.
+        output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
+            will be inferred from the function types. If pydantic.BaseModels are passed
+            in, then the OutputParser will try to parse outputs using those. Otherwise
+            model outputs will simply be parsed as JSON.
+
+    Returns:
+        A runnable sequence that will pass the given function to the model when run.
+
+    Example:
+        .. code-block:: python
+
+                from typing import Optional
+
+                from langchain.chains.openai_functions import create_structured_output_chain
+                from langchain.chat_models import ChatOpenAI
+                from langchain.prompts import ChatPromptTemplate
+                from langchain.pydantic_v1 import BaseModel, Field
+
+                class Dog(BaseModel):
+                    \"\"\"Identifying information about a dog.\"\"\"
+
+                    name: str = Field(..., description="The dog's name")
+                    color: str = Field(..., description="The dog's color")
+                    fav_food: Optional[str] = Field(None, description="The dog's favorite food")
+
+                llm = ChatOpenAI(model="gpt-3.5-turbo-0613", temperature=0)
+                prompt = ChatPromptTemplate.from_messages(
+                    [
+                        ("system", "You are a world class algorithm for extracting information in structured formats."),
+                        ("human", "Use the given format to extract information from the following input: {input}"),
+                        ("human", "Tip: Make sure to answer in the correct format"),
+                    ]
+                )
+                chain = create_structured_output_chain(Dog, llm, prompt)
+                chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
+                # -> Dog(name="Harry", color="brown", fav_food="chicken")
+    """  # noqa: E501
+    if isinstance(output_schema, dict):
+        function: Any = {
+            "name": "output_formatter",
+            "description": (
+                "Output formatter. Should always be used to format your response to the"
+                " user."
+            ),
+            "parameters": output_schema,
+        }
+    else:
+
+        class _OutputFormatter(BaseModel):
+            """Output formatter. Should always be used to format your response to the user."""  # noqa: E501
+
+            output: output_schema  # type: ignore
+
+        function = _OutputFormatter
+        output_parser = output_parser or PydanticAttrOutputFunctionsParser(
+            pydantic_schema=_OutputFormatter, attr_name="output"
+        )
+    return create_openai_fn_runnable(
+        [function],
+        llm,
+        prompt,
+        output_parser=output_parser,
+        **kwargs,
+    )
+
+
+""" --- Legacy --- """
+
+
 def create_openai_fn_chain(
    functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
    llm: BaseLanguageModel,
@@ -190,7 +373,7 @@ def create_openai_fn_chain(
    output_parser: Optional[BaseLLMOutputParser] = None,
    **kwargs: Any,
 ) -> LLMChain:
-    """Create an LLM chain that uses OpenAI functions.
+    """[Legacy] Create an LLM chain that uses OpenAI functions.

    Args:
        functions: A sequence of either dictionaries, pydantic.BaseModels classes, or
@@ -260,8 +443,7 @@ def create_openai_fn_chain(
    if not functions:
        raise ValueError("Need to pass in at least one function. Received zero.")
    openai_functions = [convert_to_openai_function(f) for f in functions]
-    fn_names = [oai_fn["name"] for oai_fn in openai_functions]
-    output_parser = output_parser or _get_openai_output_parser(functions, fn_names)
+    output_parser = output_parser or get_openai_output_parser(functions)
    llm_kwargs: Dict[str, Any] = {
        "functions": openai_functions,
    }
@@ -287,7 +469,7 @@ def create_structured_output_chain(
    output_parser: Optional[BaseLLMOutputParser] = None,
    **kwargs: Any,
 ) -> LLMChain:
-    """Create an LLMChain that uses an OpenAI function to get a structured output.
+    """[Legacy] Create an LLMChain that uses an OpenAI function to get a structured output.

    Args:
        output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary