openai[minor]: implement langchain-openai package (#15503)

Todo - [x] copy over integration tests - [x] update docs with new instructions in #15513 - [x] add linear ticket to bump core -> community, community->langchain, and core->openai deps - [ ] (optional): add `pip install langchain-openai` command to each notebook using it - [x] Update docstrings to not need `openai` install - [x] Add serialization - [x] deprecate old models Contributor steps: - [x] Add secret names to manual integrations workflow in .github/workflows/_integration_test.yml - [x] Add secrets to release workflow (for pre-release testing) in .github/workflows/_release.yml Maintainer steps (Contributors should not do these): - [x] set up pypi and test pypi projects - [x] add credential secrets to Github Actions - [ ] add package to conda-forge Functional changes to existing classes: - now relies on openai client v1 (1.6.1) via concrete dep in langchain-openai package Codebase organization - some function calling stuff moved to `langchain_core.utils.function_calling` in order to be used in both community and langchain-openai
2025-09-07 22:11:51 +00:00 · 2024-01-05 15:03:28 -08:00
parent a7d023aaf0
commit ebc75c5ca7
64 changed files with 5997 additions and 387 deletions
--- a/libs/core/langchain_core/load/mapping.py
+++ b/libs/core/langchain_core/load/mapping.py
@@ -90,9 +90,9 @@ SERIALIZABLE_MAPPING = {
        "MessagesPlaceholder",
    ),
    ("langchain", "llms", "openai", "OpenAI"): (
-        "langchain",
+        "langchain_openai",
        "llms",
-        "openai",
+        "base",
        "OpenAI",
    ),
    ("langchain", "prompts", "chat", "ChatPromptTemplate"): (
@@ -203,9 +203,9 @@ SERIALIZABLE_MAPPING = {
        "StrOutputParser",
    ),
    ("langchain", "chat_models", "openai", "ChatOpenAI"): (
-        "langchain",
+        "langchain_openai",
        "chat_models",
-        "openai",
+        "base",
        "ChatOpenAI",
    ),
    ("langchain", "output_parsers", "list", "CommaSeparatedListOutputParser"): (
@@ -221,9 +221,9 @@ SERIALIZABLE_MAPPING = {
        "RunnableParallel",
    ),
    ("langchain", "chat_models", "azure_openai", "AzureChatOpenAI"): (
-        "langchain",
+        "langchain_openai",
        "chat_models",
-        "azure_openai",
+        "azure",
        "AzureChatOpenAI",
    ),
    ("langchain", "chat_models", "bedrock", "BedrockChat"): (
@@ -323,9 +323,9 @@ SERIALIZABLE_MAPPING = {
        "GooglePalm",
    ),
    ("langchain", "llms", "openai", "AzureOpenAI"): (
-        "langchain",
+        "langchain_openai",
        "llms",
-        "openai",
+        "azure",
        "AzureOpenAI",
    ),
    ("langchain", "llms", "replicate", "Replicate"): (
--- a/libs/core/langchain_core/utils/function_calling.py
+++ b/libs/core/langchain_core/utils/function_calling.py
@@ -0,0 +1,202 @@
+"""Methods for creating function specs in the style of OpenAI Functions"""
+
+import inspect
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+    cast,
+)
+
+from typing_extensions import TypedDict
+
+from langchain_core.pydantic_v1 import BaseModel
+from langchain_core.utils.json_schema import dereference_refs
+
+PYTHON_TO_JSON_TYPES = {
+    "str": "string",
+    "int": "number",
+    "float": "number",
+    "bool": "boolean",
+}
+
+
+class FunctionDescription(TypedDict):
+    """Representation of a callable function to the OpenAI API."""
+
+    name: str
+    """The name of the function."""
+    description: str
+    """A description of the function."""
+    parameters: dict
+    """The parameters of the function."""
+
+
+class ToolDescription(TypedDict):
+    """Representation of a callable function to the OpenAI API."""
+
+    type: Literal["function"]
+    function: FunctionDescription
+
+
+def convert_pydantic_to_openai_function(
+    model: Type[BaseModel],
+    *,
+    name: Optional[str] = None,
+    description: Optional[str] = None,
+) -> FunctionDescription:
+    """Converts a Pydantic model to a function description for the OpenAI API."""
+    schema = dereference_refs(model.schema())
+    schema.pop("definitions", None)
+    return {
+        "name": name or schema["title"],
+        "description": description or schema["description"],
+        "parameters": schema,
+    }
+
+
+def convert_pydantic_to_openai_tool(
+    model: Type[BaseModel],
+    *,
+    name: Optional[str] = None,
+    description: Optional[str] = None,
+) -> ToolDescription:
+    """Converts a Pydantic model to a function description for the OpenAI API."""
+    function = convert_pydantic_to_openai_function(
+        model, name=name, description=description
+    )
+    return {"type": "function", "function": function}
+
+
+def _get_python_function_name(function: Callable) -> str:
+    """Get the name of a Python function."""
+    return function.__name__
+
+
+def _parse_python_function_docstring(function: Callable) -> Tuple[str, dict]:
+    """Parse the function and argument descriptions from the docstring of a function.
+
+    Assumes the function docstring follows Google Python style guide.
+    """
+    docstring = inspect.getdoc(function)
+    if docstring:
+        docstring_blocks = docstring.split("\n\n")
+        descriptors = []
+        args_block = None
+        past_descriptors = False
+        for block in docstring_blocks:
+            if block.startswith("Args:"):
+                args_block = block
+                break
+            elif block.startswith("Returns:") or block.startswith("Example:"):
+                # Don't break in case Args come after
+                past_descriptors = True
+            elif not past_descriptors:
+                descriptors.append(block)
+            else:
+                continue
+        description = " ".join(descriptors)
+    else:
+        description = ""
+        args_block = None
+    arg_descriptions = {}
+    if args_block:
+        arg = None
+        for line in args_block.split("\n")[1:]:
+            if ":" in line:
+                arg, desc = line.split(":", maxsplit=1)
+                arg_descriptions[arg.strip()] = desc.strip()
+            elif arg:
+                arg_descriptions[arg.strip()] += " " + line.strip()
+    return description, arg_descriptions
+
+
+def _get_python_function_arguments(function: Callable, arg_descriptions: dict) -> dict:
+    """Get JsonSchema describing a Python functions arguments.
+
+    Assumes all function arguments are of primitive types (int, float, str, bool) or
+    are subclasses of pydantic.BaseModel.
+    """
+    properties = {}
+    annotations = inspect.getfullargspec(function).annotations
+    for arg, arg_type in annotations.items():
+        if arg == "return":
+            continue
+        if isinstance(arg_type, type) and issubclass(arg_type, BaseModel):
+            # Mypy error:
+            # "type" has no attribute "schema"
+            properties[arg] = arg_type.schema()  # type: ignore[attr-defined]
+        elif arg_type.__name__ in PYTHON_TO_JSON_TYPES:
+            properties[arg] = {"type": PYTHON_TO_JSON_TYPES[arg_type.__name__]}
+        if arg in arg_descriptions:
+            if arg not in properties:
+                properties[arg] = {}
+            properties[arg]["description"] = arg_descriptions[arg]
+    return properties
+
+
+def _get_python_function_required_args(function: Callable) -> List[str]:
+    """Get the required arguments for a Python function."""
+    spec = inspect.getfullargspec(function)
+    required = spec.args[: -len(spec.defaults)] if spec.defaults else spec.args
+    required += [k for k in spec.kwonlyargs if k not in (spec.kwonlydefaults or {})]
+
+    is_class = type(function) is type
+    if is_class and required[0] == "self":
+        required = required[1:]
+    return required
+
+
+def convert_python_function_to_openai_function(
+    function: Callable,
+) -> Dict[str, Any]:
+    """Convert a Python function to an OpenAI function-calling API compatible dict.
+
+    Assumes the Python function has type hints and a docstring with a description. If
+        the docstring has Google Python style argument descriptions, these will be
+        included as well.
+    """
+    description, arg_descriptions = _parse_python_function_docstring(function)
+    return {
+        "name": _get_python_function_name(function),
+        "description": description,
+        "parameters": {
+            "type": "object",
+            "properties": _get_python_function_arguments(function, arg_descriptions),
+            "required": _get_python_function_required_args(function),
+        },
+    }
+
+
+def convert_to_openai_function(
+    function: Union[Dict[str, Any], Type[BaseModel], Callable],
+) -> Dict[str, Any]:
+    """Convert a raw function/class to an OpenAI function.
+
+    Args:
+        function: Either a dictionary, a pydantic.BaseModel class, or a Python function.
+            If a dictionary is passed in, it is assumed to already be a valid OpenAI
+            function.
+
+    Returns:
+        A dict version of the passed in function which is compatible with the
+            OpenAI function-calling API.
+    """
+    if isinstance(function, dict):
+        return function
+    elif isinstance(function, type) and issubclass(function, BaseModel):
+        return cast(Dict, convert_pydantic_to_openai_function(function))
+    elif callable(function):
+        return convert_python_function_to_openai_function(function)
+
+    else:
+        raise ValueError(
+            f"Unsupported function type {type(function)}. Functions must be passed in"
+            f" as Dict, pydantic.BaseModel, or Callable."
+        )