diff --git a/docs/docs/how_to/structured_output.ipynb b/docs/docs/how_to/structured_output.ipynb index 316f2d11287..c3ddb593c57 100644 --- a/docs/docs/how_to/structured_output.ipynb +++ b/docs/docs/how_to/structured_output.ipynb @@ -43,7 +43,7 @@ "\n", "This is the easiest and most reliable way to get structured outputs. `with_structured_output()` is implemented for models that provide native APIs for structuring outputs, like tool/function calling or JSON mode, and makes use of these capabilities under the hood.\n", "\n", - "This method takes a schema as input which specifies the names, types, and descriptions of the desired output attributes. The method returns a model-like Runnable, except that instead of outputting strings or Messages it outputs objects corresponding to the given schema. The schema can be specified as a [JSON Schema](https://json-schema.org/) or a Pydantic class. If JSON Schema is used then a dictionary will be returned by the Runnable, and if a Pydantic class is used then Pydantic objects will be returned.\n", + "This method takes a schema as input which specifies the names, types, and descriptions of the desired output attributes. The method returns a model-like Runnable, except that instead of outputting strings or Messages it outputs objects corresponding to the given schema. The schema can be specified as a TypedDict class, [JSON Schema](https://json-schema.org/) or a Pydantic class. If TypedDict or JSON Schema are used then a dictionary will be returned by the Runnable, and if a Pydantic class is used then a Pydantic object will be returned.\n", "\n", "As an example, let's get a model to generate a joke and separate the setup from the punchline:\n", "\n", @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "6d55008f", "metadata": {}, "outputs": [], @@ -68,7 +68,7 @@ "\n", "from langchain_openai import ChatOpenAI\n", "\n", - "llm = ChatOpenAI(model=\"gpt-4-0125-preview\", temperature=0)" + "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0)" ] }, { @@ -76,22 +76,24 @@ "id": "a808a401-be1f-49f9-ad13-58dd68f7db5f", "metadata": {}, "source": [ - "If we want the model to return a Pydantic object, we just need to pass in the desired Pydantic class:" + "### Pydantic class\n", + "\n", + "If we want the model to return a Pydantic object, we just need to pass in the desired Pydantic class. The key advantage of using Pydantic is that the model-generated output will be validated. Pydantic will raise an error if any required fields are missing or if any fields are of the wrong type." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "070bf702", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=8)" + "Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=7)" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -102,12 +104,15 @@ "from langchain_core.pydantic_v1 import BaseModel, Field\n", "\n", "\n", + "# Pydantic\n", "class Joke(BaseModel):\n", " \"\"\"Joke to tell user.\"\"\"\n", "\n", " setup: str = Field(description=\"The setup of the joke\")\n", " punchline: str = Field(description=\"The punchline to the joke\")\n", - " rating: Optional[int] = Field(description=\"How funny the joke is, from 1 to 10\")\n", + " rating: Optional[int] = Field(\n", + " default=None, description=\"How funny the joke is, from 1 to 10\"\n", + " )\n", "\n", "\n", "structured_llm = llm.with_structured_output(Joke)\n", @@ -130,12 +135,73 @@ "id": "deddb6d3", "metadata": {}, "source": [ - "We can also pass in a [JSON Schema](https://json-schema.org/) dict if you prefer not to use Pydantic. In this case, the response is also a dict:" + "### TypedDict or JSON Schema\n", + "\n", + "If you don't want to use Pydantic, explicitly don't want validation of the arguments, or want to be able to stream the model outputs, you can define your schema using a TypedDict class. We can optionally use a special `Annotated` syntax supported by LangChain that allows you to specify the default value and description of a field. Note, the default value is *not* filled in automatically if the model doesn't generate it, it is only used in defining the schema that is passed to the model.\n", + "\n", + ":::info Requirements\n", + "\n", + "- Core: `langchain-core>=0.2.26`\n", + "- Typing extensions: It is highly recommended to import `Annotated` and `TypedDict` from `typing_extensions` instead of `typing` to ensure consistent behavior across Python versions.\n", + "\n", + ":::" ] }, { "cell_type": "code", "execution_count": 8, + "id": "70d82891-42e8-424a-919e-07d83bcfec61", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'setup': 'Why was the cat sitting on the computer?',\n", + " 'punchline': 'Because it wanted to keep an eye on the mouse!',\n", + " 'rating': 7}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from typing_extensions import Annotated, TypedDict\n", + "\n", + "\n", + "# TypedDict\n", + "class Joke(TypedDict):\n", + " \"\"\"Joke to tell user.\"\"\"\n", + "\n", + " setup: Annotated[str, ..., \"The setup of the joke\"]\n", + "\n", + " # Alternatively, we could have specified setup as:\n", + "\n", + " # setup: str # no default, no description\n", + " # setup: Annotated[str, ...] # no default, no description\n", + " # setup: Annotated[str, \"foo\"] # default, no description\n", + "\n", + " punchline: Annotated[str, ..., \"The punchline of the joke\"]\n", + " rating: Annotated[Optional[int], None, \"How funny the joke is, from 1 to 10\"]\n", + "\n", + "\n", + "structured_llm = llm.with_structured_output(Joke)\n", + "\n", + "structured_llm.invoke(\"Tell me a joke about cats\")" + ] + }, + { + "cell_type": "markdown", + "id": "e4d7b4dc-f617-4ea8-aa58-847c228791b4", + "metadata": {}, + "source": [ + "Equivalently, we can pass in a [JSON Schema](https://json-schema.org/) dict. This requires no imports or classes and makes it very clear exactly how each parameter is documented, at the cost of being a bit more verbose." + ] + }, + { + "cell_type": "code", + "execution_count": 6, "id": "6700994a", "metadata": {}, "outputs": [ @@ -144,10 +210,10 @@ "text/plain": [ "{'setup': 'Why was the cat sitting on the computer?',\n", " 'punchline': 'Because it wanted to keep an eye on the mouse!',\n", - " 'rating': 8}" + " 'rating': 7}" ] }, - "execution_count": 8, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -169,6 +235,7 @@ " \"rating\": {\n", " \"type\": \"integer\",\n", " \"description\": \"How funny the joke is, from 1 to 10\",\n", + " \"default\": None,\n", " },\n", " },\n", " \"required\": [\"setup\", \"punchline\"],\n", @@ -185,7 +252,7 @@ "source": [ "### Choosing between multiple schemas\n", "\n", - "The simplest way to let the model choose from multiple schemas is to create a parent Pydantic class that has a Union-typed attribute:" + "The simplest way to let the model choose from multiple schemas is to create a parent schema that has a Union-typed attribute:" ] }, { @@ -209,6 +276,17 @@ "from typing import Union\n", "\n", "\n", + "# Pydantic\n", + "class Joke(BaseModel):\n", + " \"\"\"Joke to tell user.\"\"\"\n", + "\n", + " setup: str = Field(description=\"The setup of the joke\")\n", + " punchline: str = Field(description=\"The punchline to the joke\")\n", + " rating: Optional[int] = Field(\n", + " default=None, description=\"How funny the joke is, from 1 to 10\"\n", + " )\n", + "\n", + "\n", "class ConversationalResponse(BaseModel):\n", " \"\"\"Respond in a conversational manner. Be kind and helpful.\"\"\"\n", "\n", @@ -260,7 +338,7 @@ "source": [ "### Streaming\n", "\n", - "We can stream outputs from our structured model when the output type is a dict (i.e., when the schema is specified as a JSON Schema dict). \n", + "We can stream outputs from our structured model when the output type is a dict (i.e., when the schema is specified as a TypedDict class or JSON Schema dict). \n", "\n", ":::info\n", "\n", @@ -271,7 +349,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 9, "id": "aff89877-28a3-472f-a1aa-eff893fe7736", "metadata": {}, "outputs": [ @@ -302,12 +380,24 @@ "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the'}\n", "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse'}\n", "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!'}\n", - "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!', 'rating': 8}\n" + "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!', 'rating': 7}\n" ] } ], "source": [ - "structured_llm = llm.with_structured_output(json_schema)\n", + "from typing_extensions import Annotated, TypedDict\n", + "\n", + "\n", + "# TypedDict\n", + "class Joke(TypedDict):\n", + " \"\"\"Joke to tell user.\"\"\"\n", + "\n", + " setup: Annotated[str, ..., \"The setup of the joke\"]\n", + " punchline: Annotated[str, ..., \"The punchline of the joke\"]\n", + " rating: Annotated[Optional[int], None, \"How funny the joke is, from 1 to 10\"]\n", + "\n", + "\n", + "structured_llm = llm.with_structured_output(Joke)\n", "\n", "for chunk in structured_llm.stream(\"Tell me a joke about cats\"):\n", " print(chunk)" @@ -327,7 +417,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 11, "id": "283ba784-2072-47ee-9b2c-1119e3c69e8e", "metadata": {}, "outputs": [ @@ -335,11 +425,11 @@ "data": { "text/plain": [ "{'setup': 'Woodpecker',\n", - " 'punchline': \"Woodpecker goes 'knock knock', but don't worry, they never expect you to answer the door!\",\n", - " 'rating': 8}" + " 'punchline': \"Woodpecker who? Woodpecker who can't find a tree is just a bird with a headache!\",\n", + " 'rating': 7}" ] }, - "execution_count": 47, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -377,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 12, "id": "d7381cb0-b2c3-4302-a319-ed72d0b9e43f", "metadata": {}, "outputs": [ @@ -385,11 +475,11 @@ "data": { "text/plain": [ "{'setup': 'Crocodile',\n", - " 'punchline': \"Crocodile 'see you later', but in a while, it becomes an alligator!\",\n", + " 'punchline': 'Crocodile be seeing you later, alligator!',\n", " 'rating': 7}" ] }, - "execution_count": 46, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -491,23 +581,24 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 15, "id": "df0370e3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=None)" + "{'setup': 'Why was the cat sitting on the computer?',\n", + " 'punchline': 'Because it wanted to keep an eye on the mouse!'}" ] }, - "execution_count": 6, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "structured_llm = llm.with_structured_output(Joke, method=\"json_mode\")\n", + "structured_llm = llm.with_structured_output(None, method=\"json_mode\")\n", "\n", "structured_llm.invoke(\n", " \"Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys\"\n", @@ -526,19 +617,21 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 17, "id": "10ed2842", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_ASK4EmZeZ69Fi3p554Mb4rWy', 'function': {'arguments': '{\"setup\":\"Why was the cat sitting on the computer?\",\"punchline\":\"Because it wanted to keep an eye on the mouse!\"}', 'name': 'Joke'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 36, 'prompt_tokens': 107, 'total_tokens': 143}, 'model_name': 'gpt-4-0125-preview', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-6491d35b-9164-4656-b75c-d7882cfb76cb-0', tool_calls=[{'name': 'Joke', 'args': {'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!'}, 'id': 'call_ASK4EmZeZ69Fi3p554Mb4rWy'}], usage_metadata={'input_tokens': 107, 'output_tokens': 36, 'total_tokens': 143}),\n", - " 'parsed': Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=None),\n", + "{'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_f25ZRmh8u5vHlOWfTUw8sJFZ', 'function': {'arguments': '{\"setup\":\"Why was the cat sitting on the computer?\",\"punchline\":\"Because it wanted to keep an eye on the mouse!\",\"rating\":7}', 'name': 'Joke'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 93, 'total_tokens': 126}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None}, id='run-d880d7e2-df08-4e9e-ad92-dfc29f2fd52f-0', tool_calls=[{'name': 'Joke', 'args': {'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!', 'rating': 7}, 'id': 'call_f25ZRmh8u5vHlOWfTUw8sJFZ', 'type': 'tool_call'}], usage_metadata={'input_tokens': 93, 'output_tokens': 33, 'total_tokens': 126}),\n", + " 'parsed': {'setup': 'Why was the cat sitting on the computer?',\n", + " 'punchline': 'Because it wanted to keep an eye on the mouse!',\n", + " 'rating': 7},\n", " 'parsing_error': None}" ] }, - "execution_count": 5, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -546,9 +639,7 @@ "source": [ "structured_llm = llm.with_structured_output(Joke, include_raw=True)\n", "\n", - "structured_llm.invoke(\n", - " \"Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys\"\n", - ")" + "structured_llm.invoke(\"Tell me a joke about cats\")" ] }, { @@ -824,7 +915,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -838,7 +929,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/docs/docs/how_to/tool_calling.ipynb b/docs/docs/how_to/tool_calling.ipynb index 06e375967c1..5e38d15fbac 100644 --- a/docs/docs/how_to/tool_calling.ipynb +++ b/docs/docs/how_to/tool_calling.ipynb @@ -24,10 +24,9 @@ "This guide assumes familiarity with the following concepts:\n", "\n", "- [Chat models](/docs/concepts/#chat-models)\n", - "- [LangChain Tools](/docs/concepts/#tools)\n", "- [Tool calling](/docs/concepts/#functiontool-calling)\n", + "- [Tools](/docs/concepts/#tools)\n", "- [Output parsers](/docs/concepts/#output-parsers)\n", - "\n", ":::\n", "\n", "[Tool calling](/docs/concepts/#functiontool-calling) allows a chat model to respond to a given prompt by \"calling a tool\".\n", @@ -38,15 +37,11 @@ "\n", "![Diagram of calling a tool](/img/tool_call.png)\n", "\n", - "If you want to see how to use the model-generated tool call to actually run a tool function [check out this guide](/docs/how_to/tool_results_pass_to_model/).\n", + "If you want to see how to use the model-generated tool call to actually run a tool [check out this guide](/docs/how_to/tool_results_pass_to_model/).\n", "\n", ":::note Supported models\n", "\n", - "Tool calling is not universal, but is supported by many popular LLM providers, including [Anthropic](/docs/integrations/chat/anthropic/), \n", - "[Cohere](/docs/integrations/chat/cohere/), [Google](/docs/integrations/chat/google_vertex_ai_palm/), \n", - "[Mistral](/docs/integrations/chat/mistralai/), [OpenAI](/docs/integrations/chat/openai/), and even for locally-running models via [Ollama](/docs/integrations/chat/ollama/).\n", - "\n", - "You can find a [list of all models that support tool calling here](/docs/integrations/chat/).\n", + "Tool calling is not universal, but is supported by many popular LLM providers. You can find a [list of all models that support tool calling here](/docs/integrations/chat/).\n", "\n", ":::\n", "\n", @@ -58,14 +53,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Passing tools to chat models\n", + "## Defining tool schemas\n", "\n", - "Chat models that support tool calling features implement a `.bind_tools` method, which \n", - "receives a list of functions, Pydantic models, or LangChain [tool objects](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.BaseTool.html#langchain_core.tools.BaseTool) \n", - "and binds them to the chat model in its expected format. Subsequent invocations of the \n", - "chat model will include tool schemas in its calls to the LLM.\n", + "For a model to be able to call tools, we need to pass in tool schemas that describe what the tool does and what it's arguments are. Chat models that support tool calling features implement a `.bind_tools()` method for passing tool schemas to the model. Tool schemas can be passed in as Python functions (with typehints and docstrings), Pydantic models, TypedDict classes, or LangChain [Tool objects](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.BaseTool.html#langchain_core.tools.BaseTool). Subsequent invocations of the model will pass in these tool schemas along with the prompt.\n", "\n", - "For example, below we implement simple tools for arithmetic:" + "### Python functions\n", + "Our tool schemas can be Python functions:" ] }, { @@ -74,26 +67,41 @@ "metadata": {}, "outputs": [], "source": [ + "# The function name, type hints, and docstring are all part of the tool\n", + "# schema that's passed to the model. Defining good, descriptive schemas\n", + "# is an extension of prompt engineering and is an important part of\n", + "# getting models to perform well.\n", "def add(a: int, b: int) -> int:\n", - " \"\"\"Adds a and b.\"\"\"\n", + " \"\"\"Add two integers.\n", + "\n", + " Args:\n", + " a: First integer\n", + " b: Second integer\n", + " \"\"\"\n", " return a + b\n", "\n", "\n", "def multiply(a: int, b: int) -> int:\n", - " \"\"\"Multiplies a and b.\"\"\"\n", - " return a * b\n", + " \"\"\"Multiply two integers.\n", "\n", - "\n", - "tools = [add, multiply]" + " Args:\n", + " a: First integer\n", + " b: Second integer\n", + " \"\"\"\n", + " return a * b" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ + "### LangChain Tool\n", + "\n", "LangChain also implements a `@tool` decorator that allows for further control of the tool schema, such as tool names and argument descriptions. See the how-to guide [here](/docs/how_to/custom_tools/#creating-tools-from-functions) for details.\n", "\n", - "We can also define the schemas without the accompanying functions using [Pydantic](https://docs.pydantic.dev):" + "### Pydantic class\n", + "\n", + "You can equivalently define the schemas without the accompanying functions using [Pydantic](https://docs.pydantic.dev):" ] }, { @@ -105,23 +113,57 @@ "from langchain_core.pydantic_v1 import BaseModel, Field\n", "\n", "\n", - "# Note that the docstrings here are crucial, as they will be passed along\n", - "# to the model along with the class name.\n", - "class Add(BaseModel):\n", - " \"\"\"Add two integers together.\"\"\"\n", + "class add(BaseModel):\n", + " \"\"\"Add two integers.\"\"\"\n", "\n", " a: int = Field(..., description=\"First integer\")\n", " b: int = Field(..., description=\"Second integer\")\n", "\n", "\n", - "class Multiply(BaseModel):\n", - " \"\"\"Multiply two integers together.\"\"\"\n", + "class multiply(BaseModel):\n", + " \"\"\"Multiply two integers.\"\"\"\n", "\n", " a: int = Field(..., description=\"First integer\")\n", - " b: int = Field(..., description=\"Second integer\")\n", + " b: int = Field(..., description=\"Second integer\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TypedDict class\n", + "\n", + ":::info Requires `langchain-core>=0.2.25`\n", + ":::\n", + "\n", + "Or using TypedDicts and annotations:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from typing_extensions import Annotated, TypedDict\n", "\n", "\n", - "tools = [Add, Multiply]" + "class add(TypedDict):\n", + " \"\"\"Add two integers.\"\"\"\n", + "\n", + " # Annotations must have the type and can optionally include a default value and description (in that order).\n", + " a: Annotated[int, ..., \"First integer\"]\n", + " b: Annotated[int, ..., \"Second integer\"]\n", + "\n", + "\n", + "class multiply(BaseModel):\n", + " \"\"\"Multiply two integers.\"\"\"\n", + "\n", + " a: Annotated[int, ..., \"First integer\"]\n", + " b: Annotated[int, ..., \"Second integer\"]\n", + "\n", + "\n", + "tools = [add, multiply]" ] }, { @@ -129,7 +171,7 @@ "metadata": {}, "source": [ "To actually bind those schemas to a chat model, we'll use the `.bind_tools()` method. This handles converting\n", - "the `Add` and `Multiply` schemas to the proper format for the model. The tool schema will then be passed it in each time the model is invoked.\n", + "the `add` and `multiply` schemas to the proper format for the model. The tool schema will then be passed it in each time the model is invoked.\n", "\n", "```{=mdx}\n", "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", @@ -164,16 +206,16 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_wLTBasMppAwpdiA5CD92l9x7', 'function': {'arguments': '{\"a\":3,\"b\":12}', 'name': 'Multiply'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 89, 'total_tokens': 107}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0f03d4f0ee', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-d3f36cca-f225-416f-ac16-0217046f0b38-0', tool_calls=[{'name': 'Multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_wLTBasMppAwpdiA5CD92l9x7', 'type': 'tool_call'}], usage_metadata={'input_tokens': 89, 'output_tokens': 18, 'total_tokens': 107})" + "AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_BwYJ4UgU5pRVCBOUmiu7NhF9', 'function': {'arguments': '{\"a\":3,\"b\":12}', 'name': 'multiply'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 80, 'total_tokens': 97}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_ba606877f9', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-7f05e19e-4561-40e2-a2d0-8f4e28e9a00f-0', tool_calls=[{'name': 'multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_BwYJ4UgU5pRVCBOUmiu7NhF9', 'type': 'tool_call'}], usage_metadata={'input_tokens': 80, 'output_tokens': 17, 'total_tokens': 97})" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -214,23 +256,23 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'name': 'Multiply',\n", + "[{'name': 'multiply',\n", " 'args': {'a': 3, 'b': 12},\n", - " 'id': 'call_uqJsNrDJ8ZZnFa1BHHYAllEv',\n", + " 'id': 'call_rcdMie7E89Xx06lEKKxJyB5N',\n", " 'type': 'tool_call'},\n", - " {'name': 'Add',\n", + " {'name': 'add',\n", " 'args': {'a': 11, 'b': 49},\n", - " 'id': 'call_ud1uHAaYsdpWuxugwoJ63BDs',\n", + " 'id': 'call_nheGN8yfvSJsnIuGZaXihou3',\n", " 'type': 'tool_call'}]" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -252,31 +294,49 @@ "are populated in the `.invalid_tool_calls` attribute. An `InvalidToolCall` can have \n", "a name, string arguments, identifier, and error message.\n", "\n", - "If desired, [output parsers](/docs/how_to#output-parsers) can further \n", - "process the output. For example, we can convert existing values populated on the `.tool_calls` attribute back to the original Pydantic class using the\n", + "\n", + "## Parsing\n", + "\n", + "If desired, [output parsers](/docs/how_to#output-parsers) can further process the output. For example, we can convert existing values populated on the `.tool_calls` to Pydantic objects using the\n", "[PydanticToolsParser](https://api.python.langchain.com/en/latest/output_parsers/langchain_core.output_parsers.openai_tools.PydanticToolsParser.html):" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[Multiply(a=3, b=12), Add(a=11, b=49)]" + "[multiply(a=3, b=12), add(a=11, b=49)]" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from langchain_core.output_parsers import PydanticToolsParser\n", + "from langchain_core.pydantic_v1 import BaseModel, Field\n", "\n", - "chain = llm_with_tools | PydanticToolsParser(tools=[Multiply, Add])\n", + "\n", + "class add(BaseModel):\n", + " \"\"\"Add two integers.\"\"\"\n", + "\n", + " a: int = Field(..., description=\"First integer\")\n", + " b: int = Field(..., description=\"Second integer\")\n", + "\n", + "\n", + "class multiply(BaseModel):\n", + " \"\"\"Multiply two integers.\"\"\"\n", + "\n", + " a: int = Field(..., description=\"First integer\")\n", + " b: int = Field(..., description=\"Second integer\")\n", + "\n", + "\n", + "chain = llm_with_tools | PydanticToolsParser(tools=[add, multiply])\n", "chain.invoke(query)" ] }, @@ -294,18 +354,18 @@ "\n", "You can also check out some more specific uses of tool calling:\n", "\n", + "- Getting [structured outputs](/docs/how_to/structured_output/) from models\n", "- Few shot prompting [with tools](/docs/how_to/tools_few_shot/)\n", "- Stream [tool calls](/docs/how_to/tool_streaming/)\n", - "- Pass [runtime values to tools](/docs/how_to/tool_runtime)\n", - "- Getting [structured outputs](/docs/how_to/structured_output/) from models" + "- Pass [runtime values to tools](/docs/how_to/tool_runtime)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "poetry-venv-311", "language": "python", - "name": "python3" + "name": "poetry-venv-311" }, "language_info": { "codemirror_mode": { @@ -317,7 +377,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index c2485dbe432..bc6630858a3 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -65,7 +65,7 @@ from langchain_core.runnables import RunnableMap, RunnablePassthrough from langchain_core.runnables.config import ensure_config, run_in_executor from langchain_core.tracers._streaming import _StreamingCallbackHandler from langchain_core.utils.function_calling import convert_to_openai_tool -from langchain_core.utils.pydantic import is_basemodel_subclass +from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass if TYPE_CHECKING: from langchain_core.output_parsers.base import OutputParserLike @@ -1074,14 +1074,14 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): def bind_tools( self, - tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], + tools: Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]], **kwargs: Any, ) -> Runnable[LanguageModelInput, BaseMessage]: raise NotImplementedError() def with_structured_output( self, - schema: Union[Dict, Type[BaseModel]], + schema: Union[Dict, Type], *, include_raw: bool = False, **kwargs: Any, @@ -1089,13 +1089,25 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): """Model wrapper that returns outputs formatted to match the given schema. Args: - schema: The output schema as a dict or a Pydantic class. If a Pydantic class - then the model output will be an object of that class. If a dict then - the model output will be a dict. With a Pydantic class the returned - attributes will be validated, whereas with a dict they will not be. If - `method` is "function_calling" and `schema` is a dict, then the dict - must match the OpenAI function-calling spec. - include_raw: If False then only the parsed structured output is returned. If + schema: + The output schema. Can be passed in as: + - an OpenAI function/tool schema, + - a JSON Schema, + - a TypedDict class (support added in 0.2.26), + - or a Pydantic class. + If ``schema`` is a Pydantic class then the model output will be a + Pydantic instance of that class, and the model-generated fields will be + validated by the Pydantic class. Otherwise the model output will be a + dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool` + for more on how to properly specify types and descriptions of + schema fields when specifying a Pydantic or TypedDict class. + + .. versionchanged:: 0.2.26 + + Added support for TypedDict class. + + include_raw: + If False then only the parsed structured output is returned. If an error occurs during model output parsing it will be raised. If True then both the raw model response (a BaseMessage) and the parsed model response will be returned. If an error occurs during output parsing it @@ -1103,22 +1115,19 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): with keys "raw", "parsed", and "parsing_error". Returns: - A Runnable that takes any ChatModel input and returns as output: + A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`. - If include_raw is True then a dict with keys: - raw: BaseMessage - parsed: Optional[_DictOrPydantic] - parsing_error: Optional[BaseException] + If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs + an instance of ``schema`` (i.e., a Pydantic object). - If include_raw is False then just _DictOrPydantic is returned, - where _DictOrPydantic depends on the schema: + Otherwise, if ``include_raw`` is False then Runnable outputs a dict. - If schema is a Pydantic class then _DictOrPydantic is the Pydantic - class. + If ``include_raw`` is True, then Runnable outputs a dict with keys: + - ``"raw"``: BaseMessage + - ``"parsed"``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above. + - ``"parsing_error"``: Optional[BaseException] - If schema is a dict then _DictOrPydantic is a dict. - - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=False): + Example: Pydantic schema (include_raw=False): .. code-block:: python from langchain_core.pydantic_v1 import BaseModel @@ -1138,7 +1147,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' # ) - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=True): + Example: Pydantic schema (include_raw=True): .. code-block:: python from langchain_core.pydantic_v1 import BaseModel @@ -1158,7 +1167,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): # 'parsing_error': None # } - Example: Function-calling, dict schema (method="function_calling", include_raw=False): + Example: Dict schema (include_raw=False): .. code-block:: python from langchain_core.pydantic_v1 import BaseModel @@ -1194,7 +1203,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): llm = self.bind_tools([schema], tool_choice="any") if isinstance(schema, type) and is_basemodel_subclass(schema): output_parser: OutputParserLike = PydanticToolsParser( - tools=[schema], first_tool_only=True + tools=[cast(TypeBaseModel, schema)], first_tool_only=True ) else: key_name = convert_to_openai_tool(schema)["function"]["name"] diff --git a/libs/core/langchain_core/tools.py b/libs/core/langchain_core/tools.py index 9a28a56ba48..2bcf88404fa 100644 --- a/libs/core/langchain_core/tools.py +++ b/libs/core/langchain_core/tools.py @@ -88,6 +88,7 @@ from langchain_core.runnables.config import ( run_in_executor, ) from langchain_core.runnables.utils import accepts_context +from langchain_core.utils.function_calling import _parse_google_docstring from langchain_core.utils.pydantic import ( TypeBaseModel, _create_subset_model, @@ -140,50 +141,12 @@ def _parse_python_function_docstring( Assumes the function docstring follows Google Python style guide. """ - invalid_docstring_error = ValueError( - f"Found invalid Google-Style docstring for {function}." - ) docstring = inspect.getdoc(function) - if docstring: - docstring_blocks = docstring.split("\n\n") - if error_on_invalid_docstring: - filtered_annotations = { - arg for arg in annotations if arg not in (*(FILTERED_ARGS), "return") - } - if filtered_annotations and ( - len(docstring_blocks) < 2 or not docstring_blocks[1].startswith("Args:") - ): - raise (invalid_docstring_error) - descriptors = [] - args_block = None - past_descriptors = False - for block in docstring_blocks: - if block.startswith("Args:"): - args_block = block - break - elif block.startswith("Returns:") or block.startswith("Example:"): - # Don't break in case Args come after - past_descriptors = True - elif not past_descriptors: - descriptors.append(block) - else: - continue - description = " ".join(descriptors) - else: - if error_on_invalid_docstring: - raise (invalid_docstring_error) - description = "" - args_block = None - arg_descriptions = {} - if args_block: - arg = None - for line in args_block.split("\n")[1:]: - if ":" in line: - arg, desc = line.split(":", maxsplit=1) - arg_descriptions[arg.strip()] = desc.strip() - elif arg: - arg_descriptions[arg.strip()] += " " + line.strip() - return description, arg_descriptions + return _parse_google_docstring( + docstring, + list(annotations), + error_on_invalid_docstring=error_on_invalid_docstring, + ) def _validate_docstring_args_against_annotations( diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py index 10a65b2099e..10c2a2609df 100644 --- a/libs/core/langchain_core/utils/function_calling.py +++ b/libs/core/langchain_core/utils/function_calling.py @@ -2,7 +2,10 @@ from __future__ import annotations +import collections +import inspect import logging +import typing import uuid from typing import ( TYPE_CHECKING, @@ -12,27 +15,26 @@ from typing import ( List, Literal, Optional, + Set, + Tuple, Type, Union, cast, ) -from typing_extensions import TypedDict +from typing_extensions import Annotated, TypedDict, get_args, get_origin, is_typeddict from langchain_core._api import deprecated -from langchain_core.messages import ( - AIMessage, - BaseMessage, - HumanMessage, - ToolMessage, -) -from langchain_core.pydantic_v1 import BaseModel +from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, ToolMessage +from langchain_core.pydantic_v1 import BaseModel, Field, create_model from langchain_core.utils.json_schema import dereference_refs from langchain_core.utils.pydantic import is_basemodel_subclass if TYPE_CHECKING: from langchain_core.tools import BaseTool + logger = logging.getLogger(__name__) + PYTHON_TO_JSON_TYPES = { "str": "string", "int": "integer", @@ -188,6 +190,81 @@ def convert_python_function_to_openai_function( ) +def _convert_typed_dict_to_openai_function(typed_dict: Type) -> FunctionDescription: + visited: Dict = {} + model = cast( + Type[BaseModel], + _convert_any_typed_dicts_to_pydantic(typed_dict, visited=visited), + ) + return convert_pydantic_to_openai_function(model) + + +_MAX_TYPED_DICT_RECURSION = 25 + + +def _convert_any_typed_dicts_to_pydantic( + type_: Type, + *, + visited: Dict, + depth: int = 0, +) -> Type: + if type_ in visited: + return visited[type_] + elif depth >= _MAX_TYPED_DICT_RECURSION: + return type_ + elif is_typeddict(type_): + typed_dict = type_ + docstring = inspect.getdoc(typed_dict) + annotations_ = typed_dict.__annotations__ + description, arg_descriptions = _parse_google_docstring( + docstring, list(annotations_) + ) + fields: dict = {} + for arg, arg_type in annotations_.items(): + if get_origin(arg_type) is Annotated: + annotated_args = get_args(arg_type) + new_arg_type = _convert_any_typed_dicts_to_pydantic( + annotated_args[0], depth=depth + 1, visited=visited + ) + field_kwargs = { + k: v for k, v in zip(("default", "description"), annotated_args[1:]) + } + if (field_desc := field_kwargs.get("description")) and not isinstance( + field_desc, str + ): + raise ValueError( + f"Invalid annotation for field {arg}. Third argument to " + f"Annotated must be a string description, received value of " + f"type {type(field_desc)}." + ) + elif arg_desc := arg_descriptions.get(arg): + field_kwargs["description"] = arg_desc + else: + pass + fields[arg] = (new_arg_type, Field(**field_kwargs)) + else: + new_arg_type = _convert_any_typed_dicts_to_pydantic( + arg_type, depth=depth + 1, visited=visited + ) + field_kwargs = {"default": ...} + if arg_desc := arg_descriptions.get(arg): + field_kwargs["description"] = arg_desc + fields[arg] = (new_arg_type, Field(**field_kwargs)) + model = create_model(typed_dict.__name__, **fields) + model.__doc__ = description + visited[typed_dict] = model + return model + elif (origin := get_origin(type_)) and (type_args := get_args(type_)): + subscriptable_origin = _py_38_safe_origin(origin) + type_args = tuple( + _convert_any_typed_dicts_to_pydantic(arg, depth=depth + 1, visited=visited) + for arg in type_args + ) + return subscriptable_origin[type_args] + else: + return type_ + + @deprecated( "0.1.16", alternative="langchain_core.utils.function_calling.convert_to_openai_function()", @@ -244,22 +321,22 @@ def format_tool_to_openai_tool(tool: BaseTool) -> ToolDescription: def convert_to_openai_function( - function: Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool], + function: Union[Dict[str, Any], Type, Callable, BaseTool], ) -> Dict[str, Any]: """Convert a raw function/class to an OpenAI function. Args: - function: Either a dictionary, a pydantic.BaseModel class, or a Python function. - If a dictionary is passed in, it is assumed to already be a valid OpenAI - function or a JSON schema with top-level 'title' and 'description' keys - specified. + function: A dictionary, Pydantic BaseModel class, TypedDict class, a LangChain + Tool object, or a Python function. If a dictionary is passed in, it is + assumed to already be a valid OpenAI function or a JSON schema with + top-level 'title' and 'description' keys specified. Returns: - A dict version of the passed in function which is compatible with the - OpenAI function-calling API. + A dict version of the passed in function which is compatible with the OpenAI + function-calling API. Raises: - ValueError: If the function is not in a supported format. + ValueError: If function is not in a supported format. """ from langchain_core.tools import BaseTool @@ -280,6 +357,8 @@ def convert_to_openai_function( } elif isinstance(function, type) and is_basemodel_subclass(function): return cast(Dict, convert_pydantic_to_openai_function(function)) + elif is_typeddict(function): + return cast(Dict, _convert_typed_dict_to_openai_function(cast(Type, function))) elif isinstance(function, BaseTool): return cast(Dict, format_tool_to_openai_function(function)) elif callable(function): @@ -405,3 +484,69 @@ def tool_example_to_messages( for output, tool_call_dict in zip(tool_outputs, openai_tool_calls): messages.append(ToolMessage(content=output, tool_call_id=tool_call_dict["id"])) # type: ignore return messages + + +def _parse_google_docstring( + docstring: Optional[str], + args: List[str], + *, + error_on_invalid_docstring: bool = False, +) -> Tuple[str, dict]: + """Parse the function and argument descriptions from the docstring of a function. + + Assumes the function docstring follows Google Python style guide. + """ + if docstring: + docstring_blocks = docstring.split("\n\n") + if error_on_invalid_docstring: + filtered_annotations = { + arg for arg in args if arg not in ("run_manager", "callbacks", "return") + } + if filtered_annotations and ( + len(docstring_blocks) < 2 or not docstring_blocks[1].startswith("Args:") + ): + raise ValueError("Found invalid Google-Style docstring.") + descriptors = [] + args_block = None + past_descriptors = False + for block in docstring_blocks: + if block.startswith("Args:"): + args_block = block + break + elif block.startswith("Returns:") or block.startswith("Example:"): + # Don't break in case Args come after + past_descriptors = True + elif not past_descriptors: + descriptors.append(block) + else: + continue + description = " ".join(descriptors) + else: + if error_on_invalid_docstring: + raise ValueError("Found invalid Google-Style docstring.") + description = "" + args_block = None + arg_descriptions = {} + if args_block: + arg = None + for line in args_block.split("\n")[1:]: + if ":" in line: + arg, desc = line.split(":", maxsplit=1) + arg_descriptions[arg.strip()] = desc.strip() + elif arg: + arg_descriptions[arg.strip()] += " " + line.strip() + return description, arg_descriptions + + +def _py_38_safe_origin(origin: Type) -> Type: + origin_map: Dict[Type, Any] = { + dict: Dict, + list: List, + tuple: Tuple, + set: Set, + collections.abc.Iterable: typing.Iterable, + collections.abc.Mapping: typing.Mapping, + collections.abc.Sequence: typing.Sequence, + collections.abc.MutableMapping: typing.MutableMapping, + } + return cast(Type, origin_map.get(origin, origin)) diff --git a/libs/core/langchain_core/utils/pydantic.py b/libs/core/langchain_core/utils/pydantic.py index 8c5dbdbbb11..b49a649a4a4 100644 --- a/libs/core/langchain_core/utils/pydantic.py +++ b/libs/core/langchain_core/utils/pydantic.py @@ -9,10 +9,7 @@ from typing import Any, Callable, Dict, List, Optional, Type, TypeVar, Union import pydantic # pydantic: ignore -from langchain_core.pydantic_v1 import ( - BaseModel, - root_validator, -) +from langchain_core.pydantic_v1 import BaseModel, root_validator def get_pydantic_major_version() -> int: diff --git a/libs/core/poetry.lock b/libs/core/poetry.lock index 5964cbe532d..69fca7d639b 100644 --- a/libs/core/poetry.lock +++ b/libs/core/poetry.lock @@ -3048,4 +3048,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "74434689496616068607618a26e7de242b0bb5c3123463566f4976ab28cc747f" +content-hash = "982544b8c6dee38e7b8e7a124794f731881f38556ed1eb6c0c4f4182f55d2ecb" diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index bea325f96e3..2a06abae489 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -13,7 +13,7 @@ repository = "https://github.com/langchain-ai/langchain" [tool.mypy] disallow_untyped_defs = "True" -exclude = [ "notebooks", "examples", "example_data", "langchain_core/pydantic",] +exclude = [ "notebooks", "examples", "example_data", "langchain_core/pydantic", "tests/unit_tests/utils/test_function_calling.py"] [[tool.mypy.overrides]] module = [ "numpy", "pytest",] ignore_missing_imports = true @@ -29,6 +29,7 @@ tenacity = "^8.1.0,!=8.4.0" jsonpatch = "^1.33" PyYAML = ">=5.3" packaging = ">=23.2,<25" +typing-extensions = ">=4.7" [[tool.poetry.dependencies.pydantic]] version = ">=1,<3" python = "<3.12.4" diff --git a/libs/core/tests/unit_tests/utils/test_function_calling.py b/libs/core/tests/unit_tests/utils/test_function_calling.py index 32c6349a5fb..daa981d3143 100644 --- a/libs/core/tests/unit_tests/utils/test_function_calling.py +++ b/libs/core/tests/unit_tests/utils/test_function_calling.py @@ -1,16 +1,44 @@ # mypy: disable-error-code="annotation-unchecked" -from typing import Any, Callable, Dict, List, Literal, Optional, Type +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Literal, + Mapping, + MutableMapping, + MutableSet, + Optional, + Sequence, + Set, + Tuple, + Type, + Union, +) +from typing import TypedDict as TypingTypedDict import pytest from pydantic import BaseModel as BaseModelV2Maybe # pydantic: ignore from pydantic import Field as FieldV2Maybe # pydantic: ignore -from typing_extensions import Annotated, TypedDict +from typing_extensions import ( + Annotated as ExtensionsAnnotated, +) +from typing_extensions import ( + TypedDict as ExtensionsTypedDict, +) + +try: + from typing import Annotated as TypingAnnotated # type: ignore[attr-defined] +except ImportError: + TypingAnnotated = ExtensionsAnnotated from langchain_core.messages import AIMessage, HumanMessage, ToolMessage from langchain_core.pydantic_v1 import BaseModel, Field from langchain_core.runnables import Runnable, RunnableLambda from langchain_core.tools import BaseTool, tool from langchain_core.utils.function_calling import ( + _convert_typed_dict_to_openai_function, convert_to_openai_function, tool_example_to_messages, ) @@ -28,10 +56,10 @@ def pydantic() -> Type[BaseModel]: @pytest.fixture() -def annotated_function() -> Callable: +def Annotated_function() -> Callable: def dummy_function( - arg1: Annotated[int, "foo"], - arg2: Annotated[Literal["bar", "baz"], "one of 'bar', 'baz'"], + arg1: ExtensionsAnnotated[int, "foo"], + arg2: ExtensionsAnnotated[Literal["bar", "baz"], "one of 'bar', 'baz'"], ) -> None: """dummy function""" pass @@ -55,9 +83,9 @@ def function() -> Callable: @pytest.fixture() def runnable() -> Runnable: - class Args(TypedDict): - arg1: Annotated[int, "foo"] - arg2: Annotated[Literal["bar", "baz"], "one of 'bar', 'baz'"] + class Args(ExtensionsTypedDict): + arg1: ExtensionsAnnotated[int, "foo"] + arg2: ExtensionsAnnotated[Literal["bar", "baz"], "one of 'bar', 'baz'"] def dummy_function(input_dict: Args) -> None: pass @@ -106,6 +134,60 @@ def dummy_pydantic_v2() -> Type[BaseModelV2Maybe]: return dummy_function +@pytest.fixture() +def dummy_typing_typed_dict() -> Type: + class dummy_function(TypingTypedDict): + """dummy function""" + + arg1: TypingAnnotated[int, ..., "foo"] # noqa: F821 + arg2: TypingAnnotated[Literal["bar", "baz"], ..., "one of 'bar', 'baz'"] # noqa: F722 + + return dummy_function + + +@pytest.fixture() +def dummy_typing_typed_dict_docstring() -> Type: + class dummy_function(TypingTypedDict): + """dummy function + + Args: + arg1: foo + arg2: one of 'bar', 'baz' + """ + + arg1: int + arg2: Literal["bar", "baz"] + + return dummy_function + + +@pytest.fixture() +def dummy_extensions_typed_dict() -> Type: + class dummy_function(ExtensionsTypedDict): + """dummy function""" + + arg1: ExtensionsAnnotated[int, ..., "foo"] + arg2: ExtensionsAnnotated[Literal["bar", "baz"], ..., "one of 'bar', 'baz'"] + + return dummy_function + + +@pytest.fixture() +def dummy_extensions_typed_dict_docstring() -> Type: + class dummy_function(ExtensionsTypedDict): + """dummy function + + Args: + arg1: foo + arg2: one of 'bar', 'baz' + """ + + arg1: int + arg2: Literal["bar", "baz"] + + return dummy_function + + @pytest.fixture() def json_schema() -> Dict: return { @@ -152,9 +234,13 @@ def test_convert_to_openai_function( function: Callable, dummy_tool: BaseTool, json_schema: Dict, - annotated_function: Callable, + Annotated_function: Callable, dummy_pydantic: Type[BaseModel], runnable: Runnable, + dummy_typing_typed_dict: Type, + dummy_typing_typed_dict_docstring: Type, + dummy_extensions_typed_dict: Type, + dummy_extensions_typed_dict_docstring: Type, ) -> None: expected = { "name": "dummy_function", @@ -181,8 +267,12 @@ def test_convert_to_openai_function( expected, Dummy.dummy_function, DummyWithClassMethod.dummy_function, - annotated_function, + Annotated_function, dummy_pydantic, + dummy_typing_typed_dict, + dummy_typing_typed_dict_docstring, + dummy_extensions_typed_dict, + dummy_extensions_typed_dict_docstring, ): actual = convert_to_openai_function(fn) # type: ignore assert actual == expected @@ -356,3 +446,259 @@ def test_tool_outputs() -> None: }, ] assert messages[2].content == "Output1" + + +@pytest.mark.parametrize("use_extension_typed_dict", [True, False]) +@pytest.mark.parametrize("use_extension_annotated", [True, False]) +def test__convert_typed_dict_to_openai_function( + use_extension_typed_dict: bool, use_extension_annotated: bool +) -> None: + if use_extension_typed_dict: + TypedDict = ExtensionsTypedDict + else: + TypedDict = TypingTypedDict + if use_extension_annotated: + Annotated = TypingAnnotated + else: + Annotated = TypingAnnotated + + class SubTool(TypedDict): + """Subtool docstring""" + + args: Annotated[Dict[str, Any], {}, "this does bar"] # noqa: F722 # type: ignore + + class Tool(TypedDict): + """Docstring + + Args: + arg1: foo + """ + + arg1: str + arg2: Union[int, str, bool] + arg3: Optional[List[SubTool]] + arg4: Annotated[Literal["bar", "baz"], ..., "this does foo"] # noqa: F722 + arg5: Annotated[Optional[float], None] + arg6: Annotated[ + Optional[Sequence[Mapping[str, Tuple[Iterable[Any], SubTool]]]], [] + ] + arg7: Annotated[List[SubTool], ...] + arg8: Annotated[Tuple[SubTool], ...] + arg9: Annotated[Sequence[SubTool], ...] + arg10: Annotated[Iterable[SubTool], ...] + arg11: Annotated[Set[SubTool], ...] + arg12: Annotated[Dict[str, SubTool], ...] + arg13: Annotated[Mapping[str, SubTool], ...] + arg14: Annotated[MutableMapping[str, SubTool], ...] + arg15: Annotated[bool, False, "flag"] # noqa: F821 # type: ignore + + expected = { + "name": "Tool", + "description": "Docstring", + "parameters": { + "type": "object", + "properties": { + "arg1": {"description": "foo", "type": "string"}, + "arg2": { + "anyOf": [ + {"type": "integer"}, + {"type": "string"}, + {"type": "boolean"}, + ] + }, + "arg3": { + "type": "array", + "items": { + "description": "Subtool docstring", + "type": "object", + "properties": { + "args": { + "description": "this does bar", + "default": {}, + "type": "object", + } + }, + }, + }, + "arg4": { + "description": "this does foo", + "enum": ["bar", "baz"], + "type": "string", + }, + "arg5": {"type": "number"}, + "arg6": { + "default": [], + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "type": "array", + "minItems": 2, + "maxItems": 2, + "items": [ + {"type": "array", "items": {}}, + { + "title": "SubTool", + "description": "Subtool docstring", + "type": "object", + "properties": { + "args": { + "title": "Args", + "description": "this does bar", + "default": {}, + "type": "object", + } + }, + }, + ], + }, + }, + }, + "arg7": { + "type": "array", + "items": { + "description": "Subtool docstring", + "type": "object", + "properties": { + "args": { + "description": "this does bar", + "default": {}, + "type": "object", + } + }, + }, + }, + "arg8": { + "type": "array", + "minItems": 1, + "maxItems": 1, + "items": [ + { + "title": "SubTool", + "description": "Subtool docstring", + "type": "object", + "properties": { + "args": { + "title": "Args", + "description": "this does bar", + "default": {}, + "type": "object", + } + }, + } + ], + }, + "arg9": { + "type": "array", + "items": { + "description": "Subtool docstring", + "type": "object", + "properties": { + "args": { + "description": "this does bar", + "default": {}, + "type": "object", + } + }, + }, + }, + "arg10": { + "type": "array", + "items": { + "description": "Subtool docstring", + "type": "object", + "properties": { + "args": { + "description": "this does bar", + "default": {}, + "type": "object", + } + }, + }, + }, + "arg11": { + "type": "array", + "items": { + "description": "Subtool docstring", + "type": "object", + "properties": { + "args": { + "description": "this does bar", + "default": {}, + "type": "object", + } + }, + }, + "uniqueItems": True, + }, + "arg12": { + "type": "object", + "additionalProperties": { + "description": "Subtool docstring", + "type": "object", + "properties": { + "args": { + "description": "this does bar", + "default": {}, + "type": "object", + } + }, + }, + }, + "arg13": { + "type": "object", + "additionalProperties": { + "description": "Subtool docstring", + "type": "object", + "properties": { + "args": { + "description": "this does bar", + "default": {}, + "type": "object", + } + }, + }, + }, + "arg14": { + "type": "object", + "additionalProperties": { + "description": "Subtool docstring", + "type": "object", + "properties": { + "args": { + "description": "this does bar", + "default": {}, + "type": "object", + } + }, + }, + }, + "arg15": {"description": "flag", "default": False, "type": "boolean"}, + }, + "required": [ + "arg1", + "arg2", + "arg3", + "arg4", + "arg7", + "arg8", + "arg9", + "arg10", + "arg11", + "arg12", + "arg13", + "arg14", + ], + }, + } + actual = _convert_typed_dict_to_openai_function(Tool) + assert actual == expected + + +@pytest.mark.parametrize("typed_dict", [ExtensionsTypedDict, TypingTypedDict]) +def test__convert_typed_dict_to_openai_function_fail(typed_dict: Type) -> None: + class Tool(typed_dict): + arg1: MutableSet # Pydantic doesn't support + + with pytest.raises(TypeError): + _convert_typed_dict_to_openai_function(Tool) diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index 8b21e374e3e..fc5cd43fbe3 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -782,7 +782,7 @@ class ChatAnthropic(BaseChatModel): def bind_tools( self, - tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], + tools: Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]], *, tool_choice: Optional[ Union[Dict[str, str], Literal["any", "auto"], str] @@ -793,19 +793,19 @@ class ChatAnthropic(BaseChatModel): Args: tools: A list of tool definitions to bind to this chat model. - Can be a dictionary, pydantic model, callable, or BaseTool. Pydantic - models, callables, and BaseTools will be automatically converted to - their schema dictionary representation. + Supports Anthropic format tool schemas and any tool definition handled + by :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`. tool_choice: Which tool to require the model to call. Options are: - name of the tool (str): calls corresponding tool; - "auto" or None: automatically selects a tool (including no tool); - "any": force at least one tool to be called; - or a dict of the form: - {"type": "tool", "name": "tool_name"}, - or {"type: "any"}, - or {"type: "auto"}; - **kwargs: Any additional parameters to bind. + - name of the tool (str): calls corresponding tool; + - ``"auto"`` or None: automatically selects a tool (including no tool); + - ``"any"``: force at least one tool to be called; + - or a dict of the form: + ``{"type": "tool", "name": "tool_name"}``, + or ``{"type: "any"}``, + or ``{"type: "auto"}``; + kwargs: Any additional parameters are passed directly to + ``self.bind(**kwargs)``. Example: .. code-block:: python @@ -905,11 +905,26 @@ class ChatAnthropic(BaseChatModel): """Model wrapper that returns outputs formatted to match the given schema. Args: - schema: The output schema as a dict or a Pydantic class. If a Pydantic class - then the model output will be an object of that class. If a dict then - the model output will be a dict. With a Pydantic class the returned - attributes will be validated, whereas with a dict they will not be. - include_raw: If False then only the parsed structured output is returned. If + schema: + The output schema. Can be passed in as: + - an Anthropic tool schema, + - an OpenAI function/tool schema, + - a JSON Schema, + - a TypedDict class (support added in 0.1.22), + - or a Pydantic class. + If ``schema`` is a Pydantic class then the model output will be a + Pydantic instance of that class, and the model-generated fields will be + validated by the Pydantic class. Otherwise the model output will be a + dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool` + for more on how to properly specify types and descriptions of + schema fields when specifying a Pydantic or TypedDict class. + + .. versionchanged:: 0.1.22 + + Added support for TypedDict class. + + include_raw: + If False then only the parsed structured output is returned. If an error occurs during model output parsing it will be raised. If True then both the raw model response (a BaseMessage) and the parsed model response will be returned. If an error occurs during output parsing it @@ -917,17 +932,17 @@ class ChatAnthropic(BaseChatModel): with keys "raw", "parsed", and "parsing_error". Returns: - A Runnable that takes any ChatModel input. The output type depends on - include_raw and schema. + A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`. - If include_raw is True then output is a dict with keys: - raw: BaseMessage, - parsed: Optional[_DictOrPydantic], - parsing_error: Optional[BaseException], + If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs + an instance of ``schema`` (i.e., a Pydantic object). - If include_raw is False and schema is a Dict then the runnable outputs a Dict. - If include_raw is False and schema is a Type[BaseModel] then the runnable - outputs a BaseModel. + Otherwise, if ``include_raw`` is False then Runnable outputs a dict. + + If ``include_raw`` is True, then Runnable outputs a dict with keys: + - ``"raw"``: BaseMessage + - ``"parsed"``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above. + - ``"parsing_error"``: Optional[BaseException] Example: Pydantic schema (include_raw=False): .. code-block:: python @@ -1032,7 +1047,7 @@ class AnthropicTool(TypedDict): def convert_to_anthropic_tool( - tool: Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool], + tool: Union[Dict[str, Any], Type, Callable, BaseTool], ) -> AnthropicTool: """Convert a tool-like object to an Anthropic tool definition.""" # already in Anthropic tool format diff --git a/libs/partners/fireworks/langchain_fireworks/chat_models.py b/libs/partners/fireworks/langchain_fireworks/chat_models.py index 0a2c30e7eb1..9fc9402e8ee 100644 --- a/libs/partners/fireworks/langchain_fireworks/chat_models.py +++ b/libs/partners/fireworks/langchain_fireworks/chat_models.py @@ -688,9 +688,8 @@ class ChatFireworks(BaseChatModel): Args: tools: A list of tool definitions to bind to this chat model. - Can be a dictionary, pydantic model, callable, or BaseTool. Pydantic - models, callables, and BaseTools will be automatically converted to - their schema dictionary representation. + Supports any tool definition handled by + :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`. tool_choice: Which tool to require the model to call. Must be the name of the single provided function, "auto" to automatically determine which function to call @@ -746,19 +745,32 @@ class ChatFireworks(BaseChatModel): """Model wrapper that returns outputs formatted to match the given schema. Args: - schema: The output schema as a dict or a Pydantic class. If a Pydantic class - then the model output will be an object of that class. If a dict then - the model output will be a dict. With a Pydantic class the returned - attributes will be validated, whereas with a dict they will not be. If - `method` is "function_calling" and `schema` is a dict, then the dict - must match the Fireworks function-calling spec. - method: The method for steering model generation, either "function_calling" + schema: + The output schema. Can be passed in as: + - an OpenAI function/tool schema, + - a JSON Schema, + - a TypedDict class (support added in 0.1.7), + - or a Pydantic class. + If ``schema`` is a Pydantic class then the model output will be a + Pydantic instance of that class, and the model-generated fields will be + validated by the Pydantic class. Otherwise the model output will be a + dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool` + for more on how to properly specify types and descriptions of + schema fields when specifying a Pydantic or TypedDict class. + + .. versionchanged:: 0.1.7 + + Added support for TypedDict class. + + method: + The method for steering model generation, either "function_calling" or "json_mode". If "function_calling" then the schema will be converted - to a Fireworks function and the returned model will make use of the - function-calling API. If "json_mode" then Fireworks's JSON mode will be + to an OpenAI function and the returned model will make use of the + function-calling API. If "json_mode" then OpenAI's JSON mode will be used. Note that if using "json_mode" then you must include instructions for formatting the output into the desired schema into the model call. - include_raw: If False then only the parsed structured output is returned. If + include_raw: + If False then only the parsed structured output is returned. If an error occurs during model output parsing it will be raised. If True then both the raw model response (a BaseMessage) and the parsed model response will be returned. If an error occurs during output parsing it @@ -766,86 +778,139 @@ class ChatFireworks(BaseChatModel): with keys "raw", "parsed", and "parsing_error". Returns: - A Runnable that takes any ChatModel input and returns as output: + A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`. - If include_raw is True then a dict with keys: - raw: BaseMessage - parsed: Optional[_DictOrPydantic] - parsing_error: Optional[BaseException] + If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs + an instance of ``schema`` (i.e., a Pydantic object). - If include_raw is False then just _DictOrPydantic is returned, - where _DictOrPydantic depends on the schema: + Otherwise, if ``include_raw`` is False then Runnable outputs a dict. - If schema is a Pydantic class then _DictOrPydantic is the Pydantic - class. + If ``include_raw`` is True, then Runnable outputs a dict with keys: + - ``"raw"``: BaseMessage + - ``"parsed"``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above. + - ``"parsing_error"``: Optional[BaseException] - If schema is a dict then _DictOrPydantic is a dict. - - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=False): + Example: schema=Pydantic class, method="function_calling", include_raw=False: .. code-block:: python + from typing import Optional + from langchain_fireworks import ChatFireworks - from langchain_core.pydantic_v1 import BaseModel + from langchain_core.pydantic_v1 import BaseModel, Field + class AnswerWithJustification(BaseModel): '''An answer to the user question along with justification for the answer.''' + answer: str - justification: str + # If we provide default values and/or descriptions for fields, these will be passed + # to the model. This is an important part of improving a model's ability to + # correctly return structured outputs. + justification: Optional[str] = Field( + default=None, description="A justification for the answer." + ) + llm = ChatFireworks(model="accounts/fireworks/models/firefunction-v1", temperature=0) structured_llm = llm.with_structured_output(AnswerWithJustification) - structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers") + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) # -> AnswerWithJustification( # answer='They weigh the same', # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' # ) - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=True): + Example: schema=Pydantic class, method="function_calling", include_raw=True: .. code-block:: python from langchain_fireworks import ChatFireworks from langchain_core.pydantic_v1 import BaseModel + class AnswerWithJustification(BaseModel): '''An answer to the user question along with justification for the answer.''' + answer: str justification: str - llm = ChatFireworks(model="accounts/fireworks/models/firefunction-v1", temperature=0) - structured_llm = llm.with_structured_output(AnswerWithJustification, include_raw=True) - structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers") + llm = ChatFireworks(model="accounts/fireworks/models/firefunction-v1", temperature=0) + structured_llm = llm.with_structured_output( + AnswerWithJustification, include_raw=True + ) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) # -> { # 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}), # 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'), # 'parsing_error': None # } - Example: Function-calling, dict schema (method="function_calling", include_raw=False): + Example: schema=TypedDict class, method="function_calling", include_raw=False: .. code-block:: python + # IMPORTANT: If you are using Python <=3.8, you need to import Annotated + # from typing_extensions, not from typing. + from typing_extensions import Annotated, TypedDict + from langchain_fireworks import ChatFireworks - from langchain_core.pydantic_v1 import BaseModel - from langchain_core.utils.function_calling import convert_to_openai_tool - class AnswerWithJustification(BaseModel): + + class AnswerWithJustification(TypedDict): '''An answer to the user question along with justification for the answer.''' + answer: str - justification: str + justification: Annotated[ + Optional[str], None, "A justification for the answer." + ] + - dict_schema = convert_to_openai_tool(AnswerWithJustification) llm = ChatFireworks(model="accounts/fireworks/models/firefunction-v1", temperature=0) - structured_llm = llm.with_structured_output(dict_schema) + structured_llm = llm.with_structured_output(AnswerWithJustification) - structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers") + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) # -> { # 'answer': 'They weigh the same', # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' # } - Example: JSON mode, Pydantic schema (method="json_mode", include_raw=True): + Example: schema=OpenAI function schema, method="function_calling", include_raw=False: + .. code-block:: python + + from langchain_fireworks import ChatFireworks + + oai_schema = { + 'name': 'AnswerWithJustification', + 'description': 'An answer to the user question along with justification for the answer.', + 'parameters': { + 'type': 'object', + 'properties': { + 'answer': {'type': 'string'}, + 'justification': {'description': 'A justification for the answer.', 'type': 'string'} + }, + 'required': ['answer'] + } + } + + llm = ChatFireworks(model="accounts/fireworks/models/firefunction-v1", temperature=0) + structured_llm = llm.with_structured_output(oai_schema) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' + # } + + Example: schema=Pydantic class, method="json_mode", include_raw=True: .. code-block:: from langchain_fireworks import ChatFireworks @@ -873,12 +938,9 @@ class ChatFireworks(BaseChatModel): # 'parsing_error': None # } - Example: JSON mode, no schema (schema=None, method="json_mode", include_raw=True): + Example: schema=None, method="json_mode", include_raw=True: .. code-block:: - from langchain_fireworks import ChatFireworks - - llm = ChatFireworks(model="accounts/fireworks/models/firefunction-v1", temperature=0) structured_llm = llm.with_structured_output(method="json_mode", include_raw=True) structured_llm.invoke( @@ -894,8 +956,6 @@ class ChatFireworks(BaseChatModel): # }, # 'parsing_error': None # } - - """ # noqa: E501 if kwargs: raise ValueError(f"Received unsupported arguments {kwargs}") diff --git a/libs/partners/groq/langchain_groq/chat_models.py b/libs/partners/groq/langchain_groq/chat_models.py index c7bed62bb80..fe506407172 100644 --- a/libs/partners/groq/langchain_groq/chat_models.py +++ b/libs/partners/groq/langchain_groq/chat_models.py @@ -789,9 +789,8 @@ class ChatGroq(BaseChatModel): Args: tools: A list of tool definitions to bind to this chat model. - Can be a dictionary, pydantic model, callable, or BaseTool. Pydantic - models, callables, and BaseTools will be automatically converted to - their schema dictionary representation. + Supports any tool definition handled by + :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`. tool_choice: Which tool to require the model to call. Must be the name of the single provided function, "auto" to automatically determine which function to call @@ -856,19 +855,31 @@ class ChatGroq(BaseChatModel): """Model wrapper that returns outputs formatted to match the given schema. Args: - schema: The output schema as a dict or a Pydantic class. If a Pydantic class - then the model output will be an object of that class. If a dict then - the model output will be a dict. With a Pydantic class the returned - attributes will be validated, whereas with a dict they will not be. If - `method` is "function_calling" and `schema` is a dict, then the dict - must match the OpenAI function-calling spec. - method: The method for steering model generation, either "function_calling" + schema: + The output schema. Can be passed in as: + - an OpenAI function/tool schema, + - a JSON Schema, + - a TypedDict class (supported added in 0.1.9), + - or a Pydantic class. + If ``schema`` is a Pydantic class then the model output will be a + Pydantic instance of that class, and the model-generated fields will be + validated by the Pydantic class. Otherwise the model output will be a + dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool` + for more on how to properly specify types and descriptions of + schema fields when specifying a Pydantic or TypedDict class. + + .. versionchanged:: 0.1.9 + + Added support for TypedDict class. + method: + The method for steering model generation, either "function_calling" or "json_mode". If "function_calling" then the schema will be converted - to a OpenAI function and the returned model will make use of the - function-calling API. If "json_mode" then Groq's JSON mode will be + to an OpenAI function and the returned model will make use of the + function-calling API. If "json_mode" then OpenAI's JSON mode will be used. Note that if using "json_mode" then you must include instructions for formatting the output into the desired schema into the model call. - include_raw: If False then only the parsed structured output is returned. If + include_raw: + If False then only the parsed structured output is returned. If an error occurs during model output parsing it will be raised. If True then both the raw model response (a BaseMessage) and the parsed model response will be returned. If an error occurs during output parsing it @@ -876,85 +887,139 @@ class ChatGroq(BaseChatModel): with keys "raw", "parsed", and "parsing_error". Returns: - A Runnable that takes any ChatModel input and returns as output: + A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`. - If include_raw is True then a dict with keys: - raw: BaseMessage - parsed: Optional[_DictOrPydantic] - parsing_error: Optional[BaseException] + If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs + an instance of ``schema`` (i.e., a Pydantic object). - If include_raw is False then just _DictOrPydantic is returned, - where _DictOrPydantic depends on the schema: + Otherwise, if ``include_raw`` is False then Runnable outputs a dict. - If schema is a Pydantic class then _DictOrPydantic is the Pydantic - class. + If ``include_raw`` is True, then Runnable outputs a dict with keys: + - ``"raw"``: BaseMessage + - ``"parsed"``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above. + - ``"parsing_error"``: Optional[BaseException] - If schema is a dict then _DictOrPydantic is a dict. - - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=False): + Example: schema=Pydantic class, method="function_calling", include_raw=False: .. code-block:: python + from typing import Optional + from langchain_groq import ChatGroq - from langchain_core.pydantic_v1 import BaseModel + from langchain_core.pydantic_v1 import BaseModel, Field + class AnswerWithJustification(BaseModel): '''An answer to the user question along with justification for the answer.''' - answer: str - justification: str - llm = ChatGroq(temperature=0) + answer: str + # If we provide default values and/or descriptions for fields, these will be passed + # to the model. This is an important part of improving a model's ability to + # correctly return structured outputs. + justification: Optional[str] = Field( + default=None, description="A justification for the answer." + ) + + + llm = ChatGroq(model="llama-3.1-405b-reasoning", temperature=0) structured_llm = llm.with_structured_output(AnswerWithJustification) - structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers") + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> AnswerWithJustification( - # answer='A pound of bricks and a pound of feathers weigh the same.' - # justification="Both a pound of bricks and a pound of feathers have been defined to have the same weight. The 'pound' is a unit of weight, so any two things that are described as weighing a pound will weigh the same." + # answer='They weigh the same', + # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' # ) - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=True): + Example: schema=Pydantic class, method="function_calling", include_raw=True: .. code-block:: python from langchain_groq import ChatGroq from langchain_core.pydantic_v1 import BaseModel + class AnswerWithJustification(BaseModel): '''An answer to the user question along with justification for the answer.''' + answer: str justification: str - llm = ChatGroq(temperature=0) - structured_llm = llm.with_structured_output(AnswerWithJustification, include_raw=True) - structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers") + llm = ChatGroq(model="llama-3.1-405b-reasoning", temperature=0) + structured_llm = llm.with_structured_output( + AnswerWithJustification, include_raw=True + ) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) # -> { - # 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_01htjn3cspevxbqc1d7nkk8wab', 'function': {'arguments': '{"answer": "A pound of bricks and a pound of feathers weigh the same.", "justification": "Both a pound of bricks and a pound of feathers have been defined to have the same weight. The \'pound\' is a unit of weight, so any two things that are described as weighing a pound will weigh the same.", "unit": "pounds"}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}, id='run-456beee6-65f6-4e80-88af-a6065480822c-0'), - # 'parsed': AnswerWithJustification(answer='A pound of bricks and a pound of feathers weigh the same.', justification="Both a pound of bricks and a pound of feathers have been defined to have the same weight. The 'pound' is a unit of weight, so any two things that are described as weighing a pound will weigh the same."), + # 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}), + # 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'), # 'parsing_error': None # } - Example: Function-calling, dict schema (method="function_calling", include_raw=False): + Example: schema=TypedDict class, method="function_calling", include_raw=False: + .. code-block:: python + + # IMPORTANT: If you are using Python <=3.8, you need to import Annotated + # from typing_extensions, not from typing. + from typing_extensions import Annotated, TypedDict + + from langchain_groq import ChatGroq + + + class AnswerWithJustification(TypedDict): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: Annotated[ + Optional[str], None, "A justification for the answer." + ] + + + llm = ChatGroq(model="llama-3.1-405b-reasoning", temperature=0) + structured_llm = llm.with_structured_output(AnswerWithJustification) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' + # } + + Example: schema=OpenAI function schema, method="function_calling", include_raw=False: .. code-block:: python from langchain_groq import ChatGroq - from langchain_core.pydantic_v1 import BaseModel - from langchain_core.utils.function_calling import convert_to_openai_tool - class AnswerWithJustification(BaseModel): - '''An answer to the user question along with justification for the answer.''' - answer: str - justification: str + oai_schema = { + 'name': 'AnswerWithJustification', + 'description': 'An answer to the user question along with justification for the answer.', + 'parameters': { + 'type': 'object', + 'properties': { + 'answer': {'type': 'string'}, + 'justification': {'description': 'A justification for the answer.', 'type': 'string'} + }, + 'required': ['answer'] + } + } - dict_schema = convert_to_openai_tool(AnswerWithJustification) - llm = ChatGroq(temperature=0) - structured_llm = llm.with_structured_output(dict_schema) + llm = ChatGroq(model="llama-3.1-405b-reasoning", temperature=0) + structured_llm = llm.with_structured_output(oai_schema) - structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers") + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) # -> { - # 'answer': 'A pound of bricks and a pound of feathers weigh the same.', - # 'justification': "Both a pound of bricks and a pound of feathers have been defined to have the same weight. The 'pound' is a unit of weight, so any two things that are described as weighing a pound will weigh the same.", 'unit': 'pounds'} + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' # } - Example: JSON mode, Pydantic schema (method="json_mode", include_raw=True): + Example: schema=Pydantic class, method="json_mode", include_raw=True: .. code-block:: from langchain_groq import ChatGroq @@ -964,7 +1029,7 @@ class ChatGroq(BaseChatModel): answer: str justification: str - llm = ChatGroq(temperature=0) + llm = ChatGroq(model="llama-3.1-405b-reasoning", temperature=0) structured_llm = llm.with_structured_output( AnswerWithJustification, method="json_mode", @@ -977,17 +1042,14 @@ class ChatGroq(BaseChatModel): "What's heavier a pound of bricks or a pound of feathers?" ) # -> { - # 'raw': AIMessage(content='{\n "answer": "A pound of bricks is the same weight as a pound of feathers.",\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The material being weighed does not affect the weight, only the volume or number of items being weighed."\n}', id='run-e5453bc5-5025-4833-95f9-4967bf6d5c4f-0'), - # 'parsed': AnswerWithJustification(answer='A pound of bricks is the same weight as a pound of feathers.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The material being weighed does not affect the weight, only the volume or number of items being weighed.'), + # 'raw': AIMessage(content='{\n "answer": "They are both the same weight.",\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \n}'), + # 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'), # 'parsing_error': None # } - Example: JSON mode, no schema (schema=None, method="json_mode", include_raw=True): + Example: schema=None, method="json_mode", include_raw=True: .. code-block:: - from langchain_groq import ChatGroq - - llm = ChatGroq(temperature=0) structured_llm = llm.with_structured_output(method="json_mode", include_raw=True) structured_llm.invoke( @@ -996,14 +1058,13 @@ class ChatGroq(BaseChatModel): "What's heavier a pound of bricks or a pound of feathers?" ) # -> { - # 'raw': AIMessage(content='{\n "answer": "A pound of bricks is the same weight as a pound of feathers.",\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The material doesn\'t change the weight, only the volume or space that the material takes up."\n}', id='run-a4abbdb6-c20e-456f-bfff-da906a7e76b5-0'), + # 'raw': AIMessage(content='{\n "answer": "They are both the same weight.",\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \n}'), # 'parsed': { - # 'answer': 'A pound of bricks is the same weight as a pound of feathers.', - # 'justification': "Both a pound of bricks and a pound of feathers weigh one pound. The material doesn't change the weight, only the volume or space that the material takes up."}, + # 'answer': 'They are both the same weight.', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.' + # }, # 'parsing_error': None # } - - """ # noqa: E501 if kwargs: raise ValueError(f"Received unsupported arguments {kwargs}") diff --git a/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py b/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py index faaf793ddd0..b2fe14e8d41 100644 --- a/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py +++ b/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py @@ -29,7 +29,7 @@ from langchain_core.messages import ( ToolMessage, ) from langchain_core.outputs import ChatGeneration, ChatResult, LLMResult -from langchain_core.pydantic_v1 import BaseModel, root_validator +from langchain_core.pydantic_v1 import root_validator from langchain_core.runnables import Runnable from langchain_core.tools import BaseTool from langchain_core.utils.function_calling import convert_to_openai_tool @@ -469,7 +469,7 @@ class ChatHuggingFace(BaseChatModel): def bind_tools( self, - tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], + tools: Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]], *, tool_choice: Optional[Union[dict, str, Literal["auto", "none"], bool]] = None, **kwargs: Any, @@ -480,9 +480,8 @@ class ChatHuggingFace(BaseChatModel): Args: tools: A list of tool definitions to bind to this chat model. - Can be a dictionary, pydantic model, callable, or BaseTool. Pydantic - models, callables, and BaseTools will be automatically converted to - their schema dictionary representation. + Supports any tool definition handled by + :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`. tool_choice: Which tool to require the model to call. Must be the name of the single provided function or "auto" to automatically determine which function to call diff --git a/libs/partners/mistralai/langchain_mistralai/chat_models.py b/libs/partners/mistralai/langchain_mistralai/chat_models.py index 9b3fb816bbd..31739853f0d 100644 --- a/libs/partners/mistralai/langchain_mistralai/chat_models.py +++ b/libs/partners/mistralai/langchain_mistralai/chat_models.py @@ -638,7 +638,7 @@ class ChatMistralAI(BaseChatModel): def bind_tools( self, - tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], + tools: Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]], **kwargs: Any, ) -> Runnable[LanguageModelInput, BaseMessage]: """Bind tool-like objects to this chat model. @@ -647,16 +647,15 @@ class ChatMistralAI(BaseChatModel): Args: tools: A list of tool definitions to bind to this chat model. - Can be a dictionary, pydantic model, callable, or BaseTool. Pydantic - models, callables, and BaseTools will be automatically converted to - their schema dictionary representation. + Supports any tool definition handled by + :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`. tool_choice: Which tool to require the model to call. Must be the name of the single provided function or "auto" to automatically determine which function to call (if any), or a dict of the form: {"type": "function", "function": {"name": <>}}. - **kwargs: Any additional parameters to pass to the - :class:`~langchain.runnable.Runnable` constructor. + kwargs: Any additional parameters are passed directly to + ``self.bind(**kwargs)``. """ formatted_tools = [convert_to_openai_tool(tool) for tool in tools] @@ -664,7 +663,7 @@ class ChatMistralAI(BaseChatModel): def with_structured_output( self, - schema: Optional[Union[Dict, Type[BaseModel]]] = None, + schema: Optional[Union[Dict, Type]] = None, *, method: Literal["function_calling", "json_mode"] = "function_calling", include_raw: bool = False, @@ -673,19 +672,32 @@ class ChatMistralAI(BaseChatModel): """Model wrapper that returns outputs formatted to match the given schema. Args: - schema: The output schema as a dict or a Pydantic class. If a Pydantic class - then the model output will be an object of that class. If a dict then - the model output will be a dict. With a Pydantic class the returned - attributes will be validated, whereas with a dict they will not be. If - `method` is "function_calling" and `schema` is a dict, then the dict - must match the OpenAI function-calling spec. - method: The method for steering model generation, either "function_calling" + schema: + The output schema. Can be passed in as: + - an OpenAI function/tool schema, + - a JSON Schema, + - a TypedDict class (support added in 0.1.12), + - or a Pydantic class. + If ``schema`` is a Pydantic class then the model output will be a + Pydantic instance of that class, and the model-generated fields will be + validated by the Pydantic class. Otherwise the model output will be a + dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool` + for more on how to properly specify types and descriptions of + schema fields when specifying a Pydantic or TypedDict class. + + .. versionchanged:: 0.1.12 + + Added support for TypedDict class. + + method: + The method for steering model generation, either "function_calling" or "json_mode". If "function_calling" then the schema will be converted to an OpenAI function and the returned model will make use of the function-calling API. If "json_mode" then OpenAI's JSON mode will be used. Note that if using "json_mode" then you must include instructions for formatting the output into the desired schema into the model call. - include_raw: If False then only the parsed structured output is returned. If + include_raw: + If False then only the parsed structured output is returned. If an error occurs during model output parsing it will be raised. If True then both the raw model response (a BaseMessage) and the parsed model response will be returned. If an error occurs during output parsing it @@ -693,86 +705,139 @@ class ChatMistralAI(BaseChatModel): with keys "raw", "parsed", and "parsing_error". Returns: - A Runnable that takes any ChatModel input and returns as output: + A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`. - If include_raw is True then a dict with keys: - raw: BaseMessage - parsed: Optional[_DictOrPydantic] - parsing_error: Optional[BaseException] + If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs + an instance of ``schema`` (i.e., a Pydantic object). - If include_raw is False then just _DictOrPydantic is returned, - where _DictOrPydantic depends on the schema: + Otherwise, if ``include_raw`` is False then Runnable outputs a dict. - If schema is a Pydantic class then _DictOrPydantic is the Pydantic - class. + If ``include_raw`` is True, then Runnable outputs a dict with keys: + - ``"raw"``: BaseMessage + - ``"parsed"``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above. + - ``"parsing_error"``: Optional[BaseException] - If schema is a dict then _DictOrPydantic is a dict. - - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=False): + Example: schema=Pydantic class, method="function_calling", include_raw=False: .. code-block:: python + from typing import Optional + from langchain_mistralai import ChatMistralAI - from langchain_core.pydantic_v1 import BaseModel + from langchain_core.pydantic_v1 import BaseModel, Field + class AnswerWithJustification(BaseModel): '''An answer to the user question along with justification for the answer.''' + answer: str - justification: str + # If we provide default values and/or descriptions for fields, these will be passed + # to the model. This is an important part of improving a model's ability to + # correctly return structured outputs. + justification: Optional[str] = Field( + default=None, description="A justification for the answer." + ) + llm = ChatMistralAI(model="mistral-large-latest", temperature=0) structured_llm = llm.with_structured_output(AnswerWithJustification) - structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers") + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) # -> AnswerWithJustification( # answer='They weigh the same', # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' # ) - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=True): + Example: schema=Pydantic class, method="function_calling", include_raw=True: .. code-block:: python from langchain_mistralai import ChatMistralAI from langchain_core.pydantic_v1 import BaseModel + class AnswerWithJustification(BaseModel): '''An answer to the user question along with justification for the answer.''' + answer: str justification: str - llm = ChatMistralAI(model="mistral-large-latest", temperature=0) - structured_llm = llm.with_structured_output(AnswerWithJustification, include_raw=True) - structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers") + llm = ChatMistralAI(model="mistral-large-latest", temperature=0) + structured_llm = llm.with_structured_output( + AnswerWithJustification, include_raw=True + ) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) # -> { # 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}), # 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'), # 'parsing_error': None # } - Example: Function-calling, dict schema (method="function_calling", include_raw=False): + Example: schema=TypedDict class, method="function_calling", include_raw=False: .. code-block:: python + # IMPORTANT: If you are using Python <=3.8, you need to import Annotated + # from typing_extensions, not from typing. + from typing_extensions import Annotated, TypedDict + from langchain_mistralai import ChatMistralAI - from langchain_core.pydantic_v1 import BaseModel - from langchain_core.utils.function_calling import convert_to_openai_tool - class AnswerWithJustification(BaseModel): + + class AnswerWithJustification(TypedDict): '''An answer to the user question along with justification for the answer.''' + answer: str - justification: str + justification: Annotated[ + Optional[str], None, "A justification for the answer." + ] + - dict_schema = convert_to_openai_tool(AnswerWithJustification) llm = ChatMistralAI(model="mistral-large-latest", temperature=0) - structured_llm = llm.with_structured_output(dict_schema) + structured_llm = llm.with_structured_output(AnswerWithJustification) - structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers") + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) # -> { # 'answer': 'They weigh the same', # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' # } - Example: JSON mode, Pydantic schema (method="json_mode", include_raw=True): + Example: schema=OpenAI function schema, method="function_calling", include_raw=False: + .. code-block:: python + + from langchain_mistralai import ChatMistralAI + + oai_schema = { + 'name': 'AnswerWithJustification', + 'description': 'An answer to the user question along with justification for the answer.', + 'parameters': { + 'type': 'object', + 'properties': { + 'answer': {'type': 'string'}, + 'justification': {'description': 'A justification for the answer.', 'type': 'string'} + }, + 'required': ['answer'] + } + } + + llm = ChatMistralAI(model="mistral-large-latest", temperature=0) + structured_llm = llm.with_structured_output(oai_schema) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' + # } + + Example: schema=Pydantic class, method="json_mode", include_raw=True: .. code-block:: from langchain_mistralai import ChatMistralAI @@ -800,11 +865,9 @@ class ChatMistralAI(BaseChatModel): # 'parsing_error': None # } - Example: JSON mode, no schema (schema=None, method="json_mode", include_raw=True): + Example: schema=None, method="json_mode", include_raw=True: .. code-block:: - from langchain_mistralai import ChatMistralAI - structured_llm = llm.with_structured_output(method="json_mode", include_raw=True) structured_llm.invoke( diff --git a/libs/partners/ollama/langchain_ollama/chat_models.py b/libs/partners/ollama/langchain_ollama/chat_models.py index 2d3acc35e8d..04567e2f8c7 100644 --- a/libs/partners/ollama/langchain_ollama/chat_models.py +++ b/libs/partners/ollama/langchain_ollama/chat_models.py @@ -35,7 +35,6 @@ from langchain_core.messages import ( from langchain_core.messages.ai import UsageMetadata from langchain_core.messages.tool import tool_call from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult -from langchain_core.pydantic_v1 import BaseModel from langchain_core.runnables import Runnable from langchain_core.tools import BaseTool from langchain_core.utils.function_calling import convert_to_openai_tool @@ -723,8 +722,19 @@ class ChatOllama(BaseChatModel): def bind_tools( self, - tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], + tools: Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]], **kwargs: Any, ) -> Runnable[LanguageModelInput, BaseMessage]: + """Bind tool-like objects to this chat model. + + Assumes model is compatible with OpenAI tool-calling API. + + Args: + tools: A list of tool definitions to bind to this chat model. + Supports any tool definition handled by + :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`. + kwargs: Any additional parameters are passed directly to + ``self.bind(**kwargs)``. + """ # noqa: E501 formatted_tools = [convert_to_openai_tool(tool) for tool in tools] return super().bind(tools=formatted_tools, **kwargs) diff --git a/libs/partners/openai/langchain_openai/chat_models/azure.py b/libs/partners/openai/langchain_openai/chat_models/azure.py index b06b5b07981..eaf31a56a33 100644 --- a/libs/partners/openai/langchain_openai/chat_models/azure.py +++ b/libs/partners/openai/langchain_openai/chat_models/azure.py @@ -652,7 +652,7 @@ class AzureChatOpenAI(BaseChatOpenAI): def bind_tools( self, - tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], + tools: Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]], *, tool_choice: Optional[ Union[dict, str, Literal["auto", "none", "required", "any"], bool] @@ -703,20 +703,27 @@ class AzureChatOpenAI(BaseChatOpenAI): """Model wrapper that returns outputs formatted to match the given schema. Args: - schema: The output schema as a dict or a Pydantic class. If a Pydantic class - then the model output will be an object of that class. If a dict then - the model output will be a dict. With a Pydantic class the returned - attributes will be validated, whereas with a dict they will not be. If - `method` is "function_calling" and `schema` is a dict, then the dict - must match the OpenAI function-calling spec or be a valid JSON schema - with top level 'title' and 'description' keys specified. - method: The method for steering model generation, either "function_calling" + schema: + The output schema. Can be passed in as: + - an OpenAI function/tool schema, + - a JSON Schema, + - a TypedDict class, + - or a Pydantic class. + If ``schema`` is a Pydantic class then the model output will be a + Pydantic instance of that class, and the model-generated fields will be + validated by the Pydantic class. Otherwise the model output will be a + dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool` + for more on how to properly specify types and descriptions of + schema fields when specifying a Pydantic or TypedDict class. + method: + The method for steering model generation, either "function_calling" or "json_mode". If "function_calling" then the schema will be converted to an OpenAI function and the returned model will make use of the function-calling API. If "json_mode" then OpenAI's JSON mode will be used. Note that if using "json_mode" then you must include instructions for formatting the output into the desired schema into the model call. - include_raw: If False then only the parsed structured output is returned. If + include_raw: + If False then only the parsed structured output is returned. If an error occurs during model output parsing it will be raised. If True then both the raw model response (a BaseMessage) and the parsed model response will be returned. If an error occurs during output parsing it @@ -724,36 +731,40 @@ class AzureChatOpenAI(BaseChatOpenAI): with keys "raw", "parsed", and "parsing_error". Returns: - A Runnable that takes any ChatModel input and returns as output: + A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`. - If include_raw is True then a dict with keys: - raw: BaseMessage - parsed: Optional[_DictOrPydantic] - parsing_error: Optional[BaseException] + If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs + an instance of ``schema`` (i.e., a Pydantic object). - If include_raw is False then just _DictOrPydantic is returned, - where _DictOrPydantic depends on the schema: + Otherwise, if ``include_raw`` is False then Runnable outputs a dict. - If schema is a Pydantic class then _DictOrPydantic is the Pydantic - class. + If ``include_raw`` is True, then Runnable outputs a dict with keys: + - ``"raw"``: BaseMessage + - ``"parsed"``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above. + - ``"parsing_error"``: Optional[BaseException] - If schema is a dict then _DictOrPydantic is a dict. - - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=False): + Example: schema=Pydantic class, method="function_calling", include_raw=False: .. code-block:: python + from typing import Optional + from langchain_openai import AzureChatOpenAI - from langchain_core.pydantic_v1 import BaseModel + from langchain_core.pydantic_v1 import BaseModel, Field class AnswerWithJustification(BaseModel): '''An answer to the user question along with justification for the answer.''' answer: str - justification: str + # If we provide default values and/or descriptions for fields, these will be passed + # to the model. This is an important part of improving a model's ability to + # correctly return structured outputs. + justification: Optional[str] = Field( + default=None, description="A justification for the answer." + ) - llm = AzureChatOpenAI(azure_deployment="gpt-35-turbo", temperature=0) + llm = AzureChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) structured_llm = llm.with_structured_output(AnswerWithJustification) structured_llm.invoke( @@ -765,7 +776,7 @@ class AzureChatOpenAI(BaseChatOpenAI): # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' # ) - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=True): + Example: schema=Pydantic class, method="function_calling", include_raw=True: .. code-block:: python from langchain_openai import AzureChatOpenAI @@ -779,7 +790,7 @@ class AzureChatOpenAI(BaseChatOpenAI): justification: str - llm = AzureChatOpenAI(azure_deployment="gpt-35-turbo", temperature=0) + llm = AzureChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) structured_llm = llm.with_structured_output( AnswerWithJustification, include_raw=True ) @@ -793,24 +804,27 @@ class AzureChatOpenAI(BaseChatOpenAI): # 'parsing_error': None # } - Example: Function-calling, dict schema (method="function_calling", include_raw=False): + Example: schema=TypedDict class, method="function_calling", include_raw=False: .. code-block:: python + # IMPORTANT: If you are using Python <=3.8, you need to import Annotated + # from typing_extensions, not from typing. + from typing_extensions import Annotated, TypedDict + from langchain_openai import AzureChatOpenAI - from langchain_core.pydantic_v1 import BaseModel - from langchain_core.utils.function_calling import convert_to_openai_tool - class AnswerWithJustification(BaseModel): + class AnswerWithJustification(TypedDict): '''An answer to the user question along with justification for the answer.''' answer: str - justification: str + justification: Annotated[ + Optional[str], None, "A justification for the answer." + ] - dict_schema = convert_to_openai_tool(AnswerWithJustification) - llm = AzureChatOpenAI(azure_deployment="gpt-35-turbo", temperature=0) - structured_llm = llm.with_structured_output(dict_schema) + llm = AzureChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) + structured_llm = llm.with_structured_output(AnswerWithJustification) structured_llm.invoke( "What weighs more a pound of bricks or a pound of feathers" @@ -820,7 +834,36 @@ class AzureChatOpenAI(BaseChatOpenAI): # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' # } - Example: JSON mode, Pydantic schema (method="json_mode", include_raw=True): + Example: schema=OpenAI function schema, method="function_calling", include_raw=False: + .. code-block:: python + + from langchain_openai import AzureChatOpenAI + + oai_schema = { + 'name': 'AnswerWithJustification', + 'description': 'An answer to the user question along with justification for the answer.', + 'parameters': { + 'type': 'object', + 'properties': { + 'answer': {'type': 'string'}, + 'justification': {'description': 'A justification for the answer.', 'type': 'string'} + }, + 'required': ['answer'] + } + } + + llm = AzureChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) + structured_llm = llm.with_structured_output(oai_schema) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' + # } + + Example: schema=Pydantic class, method="json_mode", include_raw=True: .. code-block:: from langchain_openai import AzureChatOpenAI @@ -830,7 +873,7 @@ class AzureChatOpenAI(BaseChatOpenAI): answer: str justification: str - llm = AzureChatOpenAI(azure_deployment="gpt-35-turbo", temperature=0) + llm = AzureChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) structured_llm = llm.with_structured_output( AnswerWithJustification, method="json_mode", @@ -848,7 +891,7 @@ class AzureChatOpenAI(BaseChatOpenAI): # 'parsing_error': None # } - Example: JSON mode, no schema (schema=None, method="json_mode", include_raw=True): + Example: schema=None, method="json_mode", include_raw=True: .. code-block:: structured_llm = llm.with_structured_output(method="json_mode", include_raw=True) @@ -866,8 +909,6 @@ class AzureChatOpenAI(BaseChatOpenAI): # }, # 'parsing_error': None # } - - """ # noqa: E501 if kwargs: raise ValueError(f"Received unsupported arguments {kwargs}") diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 31c59cd651e..36fdf1d776d 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -285,7 +285,7 @@ class _FunctionCall(TypedDict): _BM = TypeVar("_BM", bound=BaseModel) -_DictOrPydanticClass = Union[Dict[str, Any], Type[_BM]] +_DictOrPydanticClass = Union[Dict[str, Any], Type[_BM], Type] _DictOrPydantic = Union[Dict, _BM] @@ -947,7 +947,7 @@ class BaseChatOpenAI(BaseChatModel): def bind_tools( self, - tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], + tools: Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]], *, tool_choice: Optional[ Union[dict, str, Literal["auto", "none", "required", "any"], bool] @@ -960,23 +960,19 @@ class BaseChatOpenAI(BaseChatModel): Args: tools: A list of tool definitions to bind to this chat model. - Can be a dictionary, pydantic model, callable, or BaseTool. Pydantic - models, callables, and BaseTools will be automatically converted to - their schema dictionary representation. + Supports any tool definition handled by + :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`. tool_choice: Which tool to require the model to call. Options are: - name of the tool (str): calls corresponding tool; - "auto": automatically selects a tool (including no tool); - "none": does not call a tool; - "any" or "required": force at least one tool to be called; - True: forces tool call (requires `tools` be length 1); - False: no effect; - - or a dict of the form: - {"type": "function", "function": {"name": <>}}. - **kwargs: Any additional parameters to pass to the - :class:`~langchain.runnable.Runnable` constructor. - """ + - str of the form ``"<>"``: calls <> tool. + - ``"auto"``: automatically selects a tool (including no tool). + - ``"none"``: does not call a tool. + - ``"any"`` or ``"required"`` or ``True``: force at least one tool to be called. + - dict of the form ``{"type": "function", "function": {"name": <>}}``: calls <> tool. + - ``False`` or ``None``: no effect, default OpenAI behavior. + kwargs: Any additional parameters are passed directly to + ``self.bind(**kwargs)``. + """ # noqa: E501 formatted_tools = [convert_to_openai_tool(tool) for tool in tools] if tool_choice: @@ -1046,20 +1042,32 @@ class BaseChatOpenAI(BaseChatModel): """Model wrapper that returns outputs formatted to match the given schema. Args: - schema: The output schema as a dict or a Pydantic class. If a Pydantic class - then the model output will be an object of that class. If a dict then - the model output will be a dict. With a Pydantic class the returned - attributes will be validated, whereas with a dict they will not be. If - `method` is "function_calling" and `schema` is a dict, then the dict - must match the OpenAI function-calling spec or be a valid JSON schema - with top level 'title' and 'description' keys specified. - method: The method for steering model generation, either "function_calling" + schema: + The output schema. Can be passed in as: + - an OpenAI function/tool schema, + - a JSON Schema, + - a TypedDict class (support added in 0.1.20), + - or a Pydantic class. + If ``schema`` is a Pydantic class then the model output will be a + Pydantic instance of that class, and the model-generated fields will be + validated by the Pydantic class. Otherwise the model output will be a + dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool` + for more on how to properly specify types and descriptions of + schema fields when specifying a Pydantic or TypedDict class. + + .. versionchanged:: 0.1.20 + + Added support for TypedDict class. + + method: + The method for steering model generation, either "function_calling" or "json_mode". If "function_calling" then the schema will be converted to an OpenAI function and the returned model will make use of the function-calling API. If "json_mode" then OpenAI's JSON mode will be used. Note that if using "json_mode" then you must include instructions for formatting the output into the desired schema into the model call. - include_raw: If False then only the parsed structured output is returned. If + include_raw: + If False then only the parsed structured output is returned. If an error occurs during model output parsing it will be raised. If True then both the raw model response (a BaseMessage) and the parsed model response will be returned. If an error occurs during output parsing it @@ -1067,33 +1075,37 @@ class BaseChatOpenAI(BaseChatModel): with keys "raw", "parsed", and "parsing_error". Returns: - A Runnable that takes any ChatModel input and returns as output: + A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`. - If include_raw is True then a dict with keys: - raw: BaseMessage - parsed: Optional[_DictOrPydantic] - parsing_error: Optional[BaseException] + If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs + an instance of ``schema`` (i.e., a Pydantic object). - If include_raw is False then just _DictOrPydantic is returned, - where _DictOrPydantic depends on the schema: + Otherwise, if ``include_raw`` is False then Runnable outputs a dict. - If schema is a Pydantic class then _DictOrPydantic is the Pydantic - class. + If ``include_raw`` is True, then Runnable outputs a dict with keys: + - ``"raw"``: BaseMessage + - ``"parsed"``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above. + - ``"parsing_error"``: Optional[BaseException] - If schema is a dict then _DictOrPydantic is a dict. - - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=False): + Example: schema=Pydantic class, method="function_calling", include_raw=False: .. code-block:: python + from typing import Optional + from langchain_openai import ChatOpenAI - from langchain_core.pydantic_v1 import BaseModel + from langchain_core.pydantic_v1 import BaseModel, Field class AnswerWithJustification(BaseModel): '''An answer to the user question along with justification for the answer.''' answer: str - justification: str + # If we provide default values and/or descriptions for fields, these will be passed + # to the model. This is an important part of improving a model's ability to + # correctly return structured outputs. + justification: Optional[str] = Field( + default=None, description="A justification for the answer." + ) llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) @@ -1108,7 +1120,7 @@ class BaseChatOpenAI(BaseChatModel): # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' # ) - Example: Function-calling, Pydantic schema (method="function_calling", include_raw=True): + Example: schema=Pydantic class, method="function_calling", include_raw=True: .. code-block:: python from langchain_openai import ChatOpenAI @@ -1136,24 +1148,27 @@ class BaseChatOpenAI(BaseChatModel): # 'parsing_error': None # } - Example: Function-calling, dict schema (method="function_calling", include_raw=False): + Example: schema=TypedDict class, method="function_calling", include_raw=False: .. code-block:: python + # IMPORTANT: If you are using Python <=3.8, you need to import Annotated + # from typing_extensions, not from typing. + from typing_extensions import Annotated, TypedDict + from langchain_openai import ChatOpenAI - from langchain_core.pydantic_v1 import BaseModel - from langchain_core.utils.function_calling import convert_to_openai_tool - class AnswerWithJustification(BaseModel): + class AnswerWithJustification(TypedDict): '''An answer to the user question along with justification for the answer.''' answer: str - justification: str + justification: Annotated[ + Optional[str], None, "A justification for the answer." + ] - dict_schema = convert_to_openai_tool(AnswerWithJustification) llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) - structured_llm = llm.with_structured_output(dict_schema) + structured_llm = llm.with_structured_output(AnswerWithJustification) structured_llm.invoke( "What weighs more a pound of bricks or a pound of feathers" @@ -1163,7 +1178,36 @@ class BaseChatOpenAI(BaseChatModel): # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' # } - Example: JSON mode, Pydantic schema (method="json_mode", include_raw=True): + Example: schema=OpenAI function schema, method="function_calling", include_raw=False: + .. code-block:: python + + from langchain_openai import ChatOpenAI + + oai_schema = { + 'name': 'AnswerWithJustification', + 'description': 'An answer to the user question along with justification for the answer.', + 'parameters': { + 'type': 'object', + 'properties': { + 'answer': {'type': 'string'}, + 'justification': {'description': 'A justification for the answer.', 'type': 'string'} + }, + 'required': ['answer'] + } + } + + llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) + structured_llm = llm.with_structured_output(oai_schema) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' + # } + + Example: schema=Pydantic class, method="json_mode", include_raw=True: .. code-block:: from langchain_openai import ChatOpenAI @@ -1191,7 +1235,7 @@ class BaseChatOpenAI(BaseChatModel): # 'parsing_error': None # } - Example: JSON mode, no schema (schema=None, method="json_mode", include_raw=True): + Example: schema=None, method="json_mode", include_raw=True: .. code-block:: structured_llm = llm.with_structured_output(method="json_mode", include_raw=True) @@ -1209,8 +1253,6 @@ class BaseChatOpenAI(BaseChatModel): # }, # 'parsing_error': None # } - - """ # noqa: E501 if kwargs: raise ValueError(f"Received unsupported arguments {kwargs}")