anthropic[patch]: allow structured output when thinking is enabled (#30047)

Structured output will currently always raise a BadRequestError when Claude 3.7 Sonnet's `thinking` is enabled, because we rely on forced tool use for structured output and this feature is not supported when `thinking` is enabled. Here we: - Emit a warning if `with_structured_output` is called when `thinking` is enabled. - Raise `OutputParserException` if no tool calls are generated. This is arguably preferable to raising an error in all cases. ```python from langchain_anthropic import ChatAnthropic from pydantic import BaseModel class Person(BaseModel): name: str age: int llm = ChatAnthropic( model="claude-3-7-sonnet-latest", max_tokens=5_000, thinking={"type": "enabled", "budget_tokens": 2_000}, ) structured_llm = llm.with_structured_output(Person) # <-- this generates a warning ``` ```python structured_llm.invoke("Alice is 30.") # <-- works ``` ```python structured_llm.invoke("Hello!") # <-- raises OutputParserException ```
2025-08-16 16:11:02 +00:00 · 2025-02-28 14:44:11 -05:00 · 2025-02-28 14:44:11 -05:00 · 3b066dc005
commit 3b066dc005
parent f8ed5007ea
2 changed files with 84 additions and 14 deletions
--- a/libs/partners/anthropic/langchain_anthropic/chat_models.py
+++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py
@ -26,6 +26,7 @@ from langchain_core.callbacks import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
 )
+from langchain_core.exceptions import OutputParserException
 from langchain_core.language_models import LanguageModelInput
 from langchain_core.language_models.chat_models import (
    BaseChatModel,
@ -83,6 +84,15 @@ _message_type_lookups = {
 }


+class AnthropicTool(TypedDict):
+    """Anthropic tool definition."""
+
+    name: str
+    description: str
+    input_schema: Dict[str, Any]
+    cache_control: NotRequired[Dict[str, str]]
+
+
 def _format_image(image_url: str) -> Dict:
    """
    Formats an image of format data:image/jpeg;base64,{b64_string}
@ -954,6 +964,31 @@ class ChatAnthropic(BaseChatModel):
        data = await self._async_client.messages.create(**payload)
        return self._format_output(data, **kwargs)

+    def _get_llm_for_structured_output_when_thinking_is_enabled(
+        self,
+        schema: Union[Dict, type],
+        formatted_tool: AnthropicTool,
+    ) -> Runnable[LanguageModelInput, BaseMessage]:
+        thinking_admonition = (
+            "Anthropic structured output relies on forced tool calling, "
+            "which is not supported when `thinking` is enabled. This method will raise "
+            "langchain_core.exceptions.OutputParserException if tool calls are not "
+            "generated. Consider disabling `thinking` or adjust your prompt to ensure "
+            "the tool is called."
+        )
+        warnings.warn(thinking_admonition)
+        llm = self.bind_tools(
+            [schema],
+            structured_output_format={"kwargs": {}, "schema": formatted_tool},
+        )
+
+        def _raise_if_no_tool_calls(message: AIMessage) -> AIMessage:
+            if not message.tool_calls:
+                raise OutputParserException(thinking_admonition)
+            return message
+
+        return llm | _raise_if_no_tool_calls
+
    def bind_tools(
        self,
        tools: Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]],
@ -1251,11 +1286,17 @@ class ChatAnthropic(BaseChatModel):
        """  # noqa: E501
        formatted_tool = convert_to_anthropic_tool(schema)
        tool_name = formatted_tool["name"]
-        llm = self.bind_tools(
-            [schema],
-            tool_choice=tool_name,
-            structured_output_format={"kwargs": {}, "schema": formatted_tool},
-        )
+        if self.thinking is not None and self.thinking.get("type") == "enabled":
+            llm = self._get_llm_for_structured_output_when_thinking_is_enabled(
+                schema, formatted_tool
+            )
+        else:
+            llm = self.bind_tools(
+                [schema],
+                tool_choice=tool_name,
+                structured_output_format={"kwargs": {}, "schema": formatted_tool},
+            )
+
        if isinstance(schema, type) and is_basemodel_subclass(schema):
            output_parser: OutputParserLike = PydanticToolsParser(
                tools=[schema], first_tool_only=True
@ -1358,15 +1399,6 @@ class ChatAnthropic(BaseChatModel):
        return response.input_tokens


-class AnthropicTool(TypedDict):
-    """Anthropic tool definition."""
-
-    name: str
-    description: str
-    input_schema: Dict[str, Any]
-    cache_control: NotRequired[Dict[str, str]]
-
-
 def convert_to_anthropic_tool(
    tool: Union[Dict[str, Any], Type, Callable, BaseTool],
 ) -> AnthropicTool:
--- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
@ -6,7 +6,9 @@ from typing import List, Optional

 import pytest
 import requests
+from anthropic import BadRequestError
 from langchain_core.callbacks import CallbackManager
+from langchain_core.exceptions import OutputParserException
 from langchain_core.messages import (
    AIMessage,
    AIMessageChunk,
@ -730,3 +732,39 @@ def test_redacted_thinking() -> None:
            assert set(block.keys()) == {"type", "data", "index"}
            assert block["data"] and isinstance(block["data"], str)
    assert stream_has_reasoning
+
+
+def test_structured_output_thinking_enabled() -> None:
+    llm = ChatAnthropic(
+        model="claude-3-7-sonnet-latest",
+        max_tokens=5_000,
+        thinking={"type": "enabled", "budget_tokens": 2_000},
+    )
+    with pytest.warns(match="structured output"):
+        structured_llm = llm.with_structured_output(GenerateUsername)
+    query = "Generate a username for Sally with green hair"
+    response = structured_llm.invoke(query)
+    assert isinstance(response, GenerateUsername)
+
+    with pytest.raises(OutputParserException):
+        structured_llm.invoke("Hello")
+
+    # Test streaming
+    for chunk in structured_llm.stream(query):
+        assert isinstance(chunk, GenerateUsername)
+
+
+def test_structured_output_thinking_force_tool_use() -> None:
+    # Structured output currently relies on forced tool use, which is not supported
+    # when `thinking` is enabled. When this test fails, it means that the feature
+    # is supported and the workarounds in `with_structured_output` should be removed.
+    llm = ChatAnthropic(
+        model="claude-3-7-sonnet-latest",
+        max_tokens=5_000,
+        thinking={"type": "enabled", "budget_tokens": 2_000},
+    ).bind_tools(
+        [GenerateUsername],
+        tool_choice="GenerateUsername",
+    )
+    with pytest.raises(BadRequestError):
+        llm.invoke("Generate a username for Sally with green hair")