feat(anthropic): web fetch beta (#32894)

Note: citations are broken until Anthropic fixes their API
2025-09-13 13:36:15 +00:00 · 2025-09-11 15:14:06 -04:00
parent 83d938593b
commit 00e992a780
5 changed files with 1619 additions and 1070 deletions
--- a/docs/docs/integrations/chat/anthropic.ipynb
+++ b/docs/docs/integrations/chat/anthropic.ipynb
@@ -970,8 +970,8 @@
   "source": [
    "### In tool results (agentic RAG)\n",
    "\n",
-    ":::info Requires ``langchain-anthropic>=0.3.17``\n",
-    "\n",
+    ":::info\n",
+    "Requires ``langchain-anthropic>=0.3.17``\n",
    ":::\n",
    "\n",
    "Claude supports a [search_result](https://docs.anthropic.com/en/docs/build-with-claude/search-results) content block representing citable results from queries against a knowledge base or other custom source. These content blocks can be passed to claude both top-line (as in the above example) and within a tool result. This allows Claude to cite elements of its response using the result of a tool call.\n",
@@ -1290,6 +1290,58 @@
    "print(f\"Key Points: {result.key_points}\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "c580c20a",
+   "metadata": {},
+   "source": [
+    "### Web fetching\n",
+    "\n",
+    "Claude can use a [web fetching tool](https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/web-fetch-tool) to run searches and ground its responses with citations."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5cf6ad08",
+   "metadata": {},
+   "source": [
+    ":::info\n",
+    "Web search tool is supported since ``langchain-anthropic>=0.3.20``\n",
+    ":::"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c4804be1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_anthropic import ChatAnthropic\n",
+    "\n",
+    "llm = ChatAnthropic(\n",
+    "    model=\"claude-3-5-haiku-latest\",\n",
+    "    betas=[\"web-fetch-2025-09-10\"],  # Enable web fetch beta\n",
+    ")\n",
+    "\n",
+    "tool = {\"type\": \"web_fetch_20250910\", \"name\": \"web_fetch\", \"max_uses\": 3}\n",
+    "llm_with_tools = llm.bind_tools([tool])\n",
+    "\n",
+    "response = llm_with_tools.invoke(\n",
+    "    \"Please analyze the content at https://example.com/article\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "088c41d0",
+   "metadata": {},
+   "source": [
+    ":::warning\n",
+    "Note: you must add the `'web-fetch-2025-09-10'` beta header to use this tool.\n",
+    ":::"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "1478cdc6-2e52-4870-80f9-b4ddf88f2db2",
@@ -1299,14 +1351,14 @@
    "\n",
    "Claude can use a [code execution tool](https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/code-execution-tool) to execute Python code in a sandboxed environment.\n",
    "\n",
-    ":::info Code execution is supported since ``langchain-anthropic>=0.3.14``\n",
-    "\n",
+    ":::info\n",
+    "Code execution is supported since ``langchain-anthropic>=0.3.14``\n",
    ":::"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "id": "2ce13632-a2da-439f-a429-f66481501630",
   "metadata": {},
   "outputs": [],
@@ -1315,7 +1367,7 @@
    "\n",
    "llm = ChatAnthropic(\n",
    "    model=\"claude-sonnet-4-20250514\",\n",
-    "    betas=[\"code-execution-2025-05-22\"],\n",
+    "    betas=[\"code-execution-2025-05-22\"],  # Enable code execution beta\n",
    ")\n",
    "\n",
    "tool = {\"type\": \"code_execution_20250522\", \"name\": \"code_execution\"}\n",
@@ -1326,6 +1378,16 @@
    ")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "a6b5e15a",
+   "metadata": {},
+   "source": [
+    ":::warning\n",
+    "Note: you must add the `'code_execution_20250522'` beta header to use this tool.\n",
+    ":::"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "24076f91-3a3d-4e53-9618-429888197061",
@@ -1404,14 +1466,14 @@
    "\n",
    "Claude can use a [MCP connector tool](https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector) for model-generated calls to remote MCP servers.\n",
    "\n",
-    ":::info Remote MCP is supported since ``langchain-anthropic>=0.3.14``\n",
-    "\n",
+    ":::info\n",
+    "Remote MCP is supported since ``langchain-anthropic>=0.3.14``\n",
    ":::"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "id": "22fc4a89-e6d8-4615-96cb-2e117349aebf",
   "metadata": {},
   "outputs": [],
@@ -1423,17 +1485,17 @@
    "        \"type\": \"url\",\n",
    "        \"url\": \"https://mcp.deepwiki.com/mcp\",\n",
    "        \"name\": \"deepwiki\",\n",
-    "        \"tool_configuration\": {  # optional configuration\n",
+    "        \"tool_configuration\": {  # Optional configuration\n",
    "            \"enabled\": True,\n",
    "            \"allowed_tools\": [\"ask_question\"],\n",
    "        },\n",
-    "        \"authorization_token\": \"PLACEHOLDER\",  # optional authorization\n",
+    "        \"authorization_token\": \"PLACEHOLDER\",  # Optional authorization\n",
    "    }\n",
    "]\n",
    "\n",
    "llm = ChatAnthropic(\n",
    "    model=\"claude-sonnet-4-20250514\",\n",
-    "    betas=[\"mcp-client-2025-04-04\"],\n",
+    "    betas=[\"mcp-client-2025-04-04\"],  # Enable MCP beta\n",
    "    mcp_servers=mcp_servers,\n",
    ")\n",
    "\n",
@@ -1443,6 +1505,16 @@
    ")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "0d6d7197",
+   "metadata": {},
+   "source": [
+    ":::warning\n",
+    "Note: you must add the `'mcp-client-2025-04-04'` beta header to use this tool.\n",
+    ":::"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "2fd5d545-a40d-42b1-ad0c-0a79e2536c9b",
--- a/libs/partners/anthropic/langchain_anthropic/chat_models.py
+++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py
@@ -71,6 +71,10 @@ class AnthropicTool(TypedDict):


 def _is_builtin_tool(tool: Any) -> bool:
+    """Check if a tool is a built-in Anthropic tool.
+
+    https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/overview
+    """
    if not isinstance(tool, dict):
        return False

@@ -83,6 +87,7 @@ def _is_builtin_tool(tool: Any) -> bool:
        "computer_",
        "bash_",
        "web_search_",
+        "web_fetch_",
        "code_execution_",
    ]
    return any(tool_type.startswith(prefix) for prefix in _builtin_tool_prefixes)
@@ -277,16 +282,7 @@ def _format_data_content_block(block: dict) -> dict:
 def _format_messages(
    messages: Sequence[BaseMessage],
 ) -> tuple[Union[str, list[dict], None], list[dict]]:
-    """Format messages for anthropic."""
-    """
-    [
-                {
-                    "role": _message_type_lookups[m.type],
-                    "content": [_AnthropicMessageContent(text=m.content).model_dump()],
-                }
-                for m in messages
-            ]
-    """
+    """Format messages for Anthropic's API."""
    system: Union[str, list[dict], None] = None
    formatted_messages: list[dict] = []
    merged_messages = _merge_messages(messages)
@@ -440,6 +436,7 @@ def _format_messages(
                        "code_execution_tool_result",
                        "mcp_tool_result",
                        "web_search_tool_result",
+                        "web_fetch_tool_result",
                    ):
                        content.append(
                            {
@@ -452,6 +449,7 @@ def _format_messages(
                                    "tool_use_id",
                                    "is_error",  # for mcp_tool_result
                                    "cache_control",
+                                    "retrieved_at",  # for web_fetch_tool_result
                                )
                            },
                        )
@@ -1137,6 +1135,24 @@ class ChatAnthropic(BaseChatModel):
                    "How do I update a web app to TypeScript 5.5?"
                )

+        .. dropdown::  Web fetch (beta)
+
+            .. code-block:: python
+
+                from langchain_anthropic import ChatAnthropic
+
+                llm = ChatAnthropic(
+                    model="claude-3-5-haiku-latest",
+                    betas=["web-fetch-2025-09-10"],  # Enable web fetch beta
+                )
+
+                tool = {"type": "web_fetch_20250910", "name": "web_fetch", "max_uses": 3}
+                llm_with_tools = llm.bind_tools([tool])
+
+                response = llm_with_tools.invoke(
+                    "Please analyze the content at https://example.com/article"
+                )
+
        .. dropdown::  Code execution

            .. code-block:: python
@@ -2232,6 +2248,7 @@ def _make_message_chunk_from_anthropic_event(
            "mcp_tool_result",
            "server_tool_use",  # Server-side tool usage
            "web_search_tool_result",  # Built-in web search results
+            "web_fetch_tool_result",  # Built-in web fetch results,
        )
    ):
        if coerce_content_to_string:
--- a/libs/partners/anthropic/pyproject.toml
+++ b/libs/partners/anthropic/pyproject.toml
@@ -7,8 +7,8 @@ authors = []
 license = { text = "MIT" }
 requires-python = ">=3.9"
 dependencies = [
-    "anthropic<1,>=0.64.0",
-    "langchain-core<1.0.0,>=0.3.74",
+    "anthropic<1,>=0.67.0",
+    "langchain-core<1.0.0,>=0.3.76",
    "pydantic<3.0.0,>=2.7.4",
 ]
 name = "langchain-anthropic"
--- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
@@ -940,6 +940,263 @@ def test_web_search() -> None:
    )


+# @pytest.mark.vcr
+@pytest.mark.xfail(reason="Citations broken in Anthropic API; all other features work")
+def test_web_fetch() -> None:
+    """Note: this is a beta feature.
+
+    TODO: Update to remove beta once it's generally available.
+    """
+    llm = ChatAnthropic(model="claude-3-5-haiku-latest", betas=["web-fetch-2025-09-10"])  # type: ignore[call-arg]
+    tool = {"type": "web_fetch_20250910", "name": "web_fetch", "max_uses": 1}
+    llm_with_tools = llm.bind_tools([tool])
+
+    input_message = {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": "Fetch the content at https://docs.langchain.com and analyze",
+            },
+        ],
+    }
+    response = llm_with_tools.invoke([input_message])
+    assert all(isinstance(block, dict) for block in response.content)
+    block_types = {
+        block["type"] for block in response.content if isinstance(block, dict)
+    }
+
+    # A successful fetch call should include:
+    # 1. text response from the model (e.g. "I'll fetch that for you")
+    # 2. server_tool_use block indicating the tool was called (using tool "web_fetch")
+    # 3. web_fetch_tool_result block with the results of said fetch
+    assert block_types == {"text", "server_tool_use", "web_fetch_tool_result"}
+
+    # Verify web fetch result structure
+    web_fetch_results = [
+        block
+        for block in response.content
+        if isinstance(block, dict) and block.get("type") == "web_fetch_tool_result"
+    ]
+    assert len(web_fetch_results) == 1  # Since max_uses=1
+    fetch_result = web_fetch_results[0]
+    assert "content" in fetch_result
+    assert "url" in fetch_result["content"]
+    assert "retrieved_at" in fetch_result["content"]
+
+    # Fetch with citations enabled
+    tool_with_citations = tool.copy()
+    tool_with_citations["citations"] = {"enabled": True}
+    llm_with_citations = llm.bind_tools([tool_with_citations])
+
+    citation_message = {
+        "role": "user",
+        "content": (
+            "Fetch https://docs.langchain.com and provide specific quotes with "
+            "citations"
+        ),
+    }
+    citation_response = llm_with_citations.invoke([citation_message])
+
+    citation_results = [
+        block for block in citation_response.content if isinstance(block, dict)
+    ]
+    assert len(citation_results) == 1  # Since max_uses=1
+    citation_result = citation_results[0]
+    assert citation_result["content"]["content"]["citations"]["enabled"]
+    text_blocks = [
+        block
+        for block in citation_response.content
+        if isinstance(block, dict) and block.get("type") == "text"
+    ]
+
+    # Check that the response contains actual citations in the content
+    has_citations = False
+    for block in text_blocks:
+        citations = block.get("citations", [])
+        for citation in citations:
+            if citation.get("type") and citation.get("start_char_index"):
+                has_citations = True
+                break
+    assert has_citations, (
+        "Expected inline citation tags in response when citations are enabled for "
+        "web fetch"
+    )
+
+    # Max content tokens param
+    tool_with_limit = tool.copy()
+    tool_with_limit["max_content_tokens"] = 1000
+    llm_with_limit = llm.bind_tools([tool_with_limit])
+
+    limit_response = llm_with_limit.invoke([input_message])
+    # Response should still work even with content limits
+    assert any(
+        block["type"] == "web_fetch_tool_result"
+        for block in limit_response.content
+        if isinstance(block, dict)
+    )
+
+    # Domains filtering (note: only one can be set at a time)
+    tool_with_allowed_domains = tool.copy()
+    tool_with_allowed_domains["allowed_domains"] = ["docs.langchain.com"]
+    llm_with_allowed = llm.bind_tools([tool_with_allowed_domains])
+
+    allowed_response = llm_with_allowed.invoke([input_message])
+    assert any(
+        block["type"] == "web_fetch_tool_result"
+        for block in allowed_response.content
+        if isinstance(block, dict)
+    )
+
+    # Test that a disallowed domain doesn't work
+    tool_with_disallowed_domains = tool.copy()
+    tool_with_disallowed_domains["allowed_domains"] = [
+        "example.com"
+    ]  # Not docs.langchain.com
+    llm_with_disallowed = llm.bind_tools([tool_with_disallowed_domains])
+
+    disallowed_response = llm_with_disallowed.invoke([input_message])
+
+    # We should get an error result since the domain (docs.langchain.com) is not allowed
+    disallowed_results = [
+        block
+        for block in disallowed_response.content
+        if isinstance(block, dict) and block.get("type") == "web_fetch_tool_result"
+    ]
+    if disallowed_results:
+        disallowed_result = disallowed_results[0]
+        if disallowed_result.get("content", {}).get("type") == "web_fetch_tool_error":
+            assert disallowed_result["content"]["error_code"] in [
+                "invalid_url",
+                "fetch_failed",
+            ]
+
+    # Blocked domains filtering
+    tool_with_blocked_domains = tool.copy()
+    tool_with_blocked_domains["blocked_domains"] = ["example.com"]
+    llm_with_blocked = llm.bind_tools([tool_with_blocked_domains])
+
+    blocked_response = llm_with_blocked.invoke([input_message])
+    assert any(
+        block["type"] == "web_fetch_tool_result"
+        for block in blocked_response.content
+        if isinstance(block, dict)
+    )
+
+    # Test fetching from a blocked domain fails
+    blocked_domain_message = {
+        "role": "user",
+        "content": "Fetch https://example.com and analyze",
+    }
+    tool_with_blocked_example = tool.copy()
+    tool_with_blocked_example["blocked_domains"] = ["example.com"]
+    llm_with_blocked_example = llm.bind_tools([tool_with_blocked_example])
+
+    blocked_domain_response = llm_with_blocked_example.invoke([blocked_domain_message])
+
+    # Should get an error when trying to access a blocked domain
+    blocked_domain_results = [
+        block
+        for block in blocked_domain_response.content
+        if isinstance(block, dict) and block.get("type") == "web_fetch_tool_result"
+    ]
+    if blocked_domain_results:
+        blocked_result = blocked_domain_results[0]
+        if blocked_result.get("content", {}).get("type") == "web_fetch_tool_error":
+            assert blocked_result["content"]["error_code"] in [
+                "invalid_url",
+                "fetch_failed",
+            ]
+
+    # Max uses parameter - test exceeding the limit
+    multi_fetch_message = {
+        "role": "user",
+        "content": (
+            "Fetch https://docs.langchain.com and then try to fetch "
+            "https://python.langchain.com"
+        ),
+    }
+    max_uses_response = llm_with_tools.invoke([multi_fetch_message])
+
+    # Should contain at least one fetch result and potentially an error for the second
+    fetch_results = [
+        block
+        for block in max_uses_response.content
+        if isinstance(block, dict) and block.get("type") == "web_fetch_tool_result"
+    ]  # type: ignore[index]
+    assert len(fetch_results) >= 1
+    error_results = [
+        r
+        for r in fetch_results
+        if r.get("content", {}).get("type") == "web_fetch_tool_error"
+    ]
+    if error_results:
+        assert any(
+            r["content"]["error_code"] == "max_uses_exceeded" for r in error_results
+        )
+
+    # Streaming
+    full: Optional[BaseMessageChunk] = None
+    for chunk in llm_with_tools.stream([input_message]):
+        assert isinstance(chunk, AIMessageChunk)
+        full = chunk if full is None else full + chunk
+    assert isinstance(full, AIMessageChunk)
+    assert isinstance(full.content, list)
+    block_types = {block["type"] for block in full.content if isinstance(block, dict)}
+    assert block_types == {"text", "server_tool_use", "web_fetch_tool_result"}
+
+    # Test that URLs from context can be used in follow-up
+    next_message = {
+        "role": "user",
+        "content": "What does the site you just fetched say about models?",
+    }
+    follow_up_response = llm_with_tools.invoke(
+        [input_message, full, next_message],
+    )
+    # Should work without issues since URL was already in context
+    assert isinstance(follow_up_response.content, (list, str))
+
+    # Error handling - test with an invalid URL format
+    error_message = {
+        "role": "user",
+        "content": "Try to fetch this invalid URL: not-a-valid-url",
+    }
+    error_response = llm_with_tools.invoke([error_message])
+
+    # Should handle the error gracefully
+    assert isinstance(error_response.content, (list, str))
+
+    # PDF document fetching
+    pdf_message = {
+        "role": "user",
+        "content": (
+            "Fetch this PDF: "
+            "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf "
+            "and summarize its content",
+        ),
+    }
+    pdf_response = llm_with_tools.invoke([pdf_message])
+
+    assert any(
+        block["type"] == "web_fetch_tool_result"
+        for block in pdf_response.content
+        if isinstance(block, dict)
+    )
+
+    # Verify PDF content structure (should have base64 data for PDFs)
+    pdf_results = [
+        block
+        for block in pdf_response.content
+        if isinstance(block, dict) and block.get("type") == "web_fetch_tool_result"
+    ]
+    if pdf_results:
+        pdf_result = pdf_results[0]
+        content = pdf_result.get("content", {})
+        if content.get("content", {}).get("source", {}).get("type") == "base64":
+            assert content["content"]["source"]["media_type"] == "application/pdf"
+            assert "data" in content["content"]["source"]
+
+
@pytest.mark.vcr
 def test_code_execution() -> None:
    """Note: this is a beta feature.
--- a/libs/partners/anthropic/uv.lock
+++ b/libs/partners/anthropic/uv.lock