feat(anthropic): support code_execution_20250825 (#33925)

2026-04-25 01:16:55 +00:00 · 2025-11-12 20:44:51 +05:00
parent 637bb1cbbc
commit 2511c28f92
5 changed files with 83 additions and 15 deletions
--- a/libs/partners/anthropic/langchain_anthropic/chat_models.py
+++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py
@@ -499,6 +499,8 @@ def _format_messages(
                        content.append({**block, "content": tool_content})
                    elif block["type"] in (
                        "code_execution_tool_result",
+                        "bash_code_execution_tool_result",
+                        "text_editor_code_execution_tool_result",
                        "mcp_tool_result",
                        "web_search_tool_result",
                        "web_fetch_tool_result",
@@ -2543,7 +2545,6 @@ def _make_message_chunk_from_anthropic_event(
    message_chunk: AIMessageChunk | None = None
    # Reference: Anthropic SDK streaming implementation
    # https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/lib/streaming/_messages.py  # noqa: E501
-
    if event.type == "message_start" and stream_usage:
        # Capture model name, but don't include usage_metadata yet
        # as it will be properly reported in message_delta with complete info
@@ -2560,17 +2561,11 @@ def _make_message_chunk_from_anthropic_event(
    elif (
        event.type == "content_block_start"
        and event.content_block is not None
-        and event.content_block.type
-        in (
-            "tool_use",  # Standard tool usage
-            "code_execution_tool_result",  # Built-in code execution results
-            "document",
-            "redacted_thinking",
-            "mcp_tool_use",
-            "mcp_tool_result",
-            "server_tool_use",  # Server-side tool usage
-            "web_search_tool_result",  # Built-in web search results
-            "web_fetch_tool_result",  # Built-in web fetch results,
+        and (
+            "tool_result" in event.content_block.type
+            or "tool_use" in event.content_block.type
+            or "document" in event.content_block.type
+            or "redacted_thinking" in event.content_block.type
        )
    ):
        if coerce_content_to_string:
--- a/libs/partners/anthropic/tests/cassettes/test_code_execution.yaml.gz
+++ b/libs/partners/anthropic/tests/cassettes/test_code_execution.yaml.gz
--- a/libs/partners/anthropic/tests/cassettes/test_code_execution_old[v0].yaml.gz
+++ b/libs/partners/anthropic/tests/cassettes/test_code_execution_old[v0].yaml.gz
--- a/libs/partners/anthropic/tests/cassettes/test_code_execution_old[v1].yaml.gz
+++ b/libs/partners/anthropic/tests/cassettes/test_code_execution_old[v1].yaml.gz
--- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
@@ -1505,10 +1505,13 @@ def test_web_fetch_v1(output_version: Literal["v0", "v1"]) -> None:

@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
-def test_code_execution(output_version: Literal["v0", "v1"]) -> None:
-    """Note: this is a beta feature.
+def test_code_execution_old(output_version: Literal["v0", "v1"]) -> None:
+    """Note: this tests the `code_execution_20250522` tool, which is now legacy.

-    TODO: Update to remove beta once generally available.
+    See the `test_code_execution` test below to test the current
+    `code_execution_20250825` tool.
+
+    Migration guide: https://docs.claude.com/en/docs/agents-and-tools/tool-use/code-execution-tool#upgrade-to-latest-tool-version
    """
    llm = ChatAnthropic(
        model=MODEL_NAME,  # type: ignore[call-arg]
@@ -1562,6 +1565,76 @@ def test_code_execution(output_version: Literal["v0", "v1"]) -> None:
    )


+@pytest.mark.default_cassette("test_code_execution.yaml.gz")
+@pytest.mark.vcr
+@pytest.mark.parametrize("output_version", ["v0", "v1"])
+def test_code_execution(output_version: Literal["v0", "v1"]) -> None:
+    """Note: this is a beta feature.
+
+    TODO: Update to remove beta once generally available.
+    """
+    llm = ChatAnthropic(
+        model=MODEL_NAME,  # type: ignore[call-arg]
+        betas=["code-execution-2025-08-25"],
+        output_version=output_version,
+    )
+
+    tool = {"type": "code_execution_20250825", "name": "code_execution"}
+    llm_with_tools = llm.bind_tools([tool])
+
+    input_message = {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": (
+                    "Calculate the mean and standard deviation of "
+                    "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
+                ),
+            },
+        ],
+    }
+    response = llm_with_tools.invoke([input_message])
+    assert all(isinstance(block, dict) for block in response.content)
+    block_types = {block["type"] for block in response.content}  # type: ignore[index]
+    if output_version == "v0":
+        assert block_types == {
+            "text",
+            "server_tool_use",
+            "text_editor_code_execution_tool_result",
+            "bash_code_execution_tool_result",
+        }
+    else:
+        assert block_types == {"text", "server_tool_call", "server_tool_result"}
+
+    # Test streaming
+    full: BaseMessageChunk | None = None
+    for chunk in llm_with_tools.stream([input_message]):
+        assert isinstance(chunk, AIMessageChunk)
+        full = chunk if full is None else full + chunk
+    assert isinstance(full, AIMessageChunk)
+    assert isinstance(full.content, list)
+    block_types = {block["type"] for block in full.content}  # type: ignore[index]
+    if output_version == "v0":
+        assert block_types == {
+            "text",
+            "server_tool_use",
+            "text_editor_code_execution_tool_result",
+            "bash_code_execution_tool_result",
+        }
+    else:
+        assert block_types == {"text", "server_tool_call", "server_tool_result"}
+
+    # Test we can pass back in
+    next_message = {
+        "role": "user",
+        "content": "Please add more comments to the code.",
+    }
+    _ = llm_with_tools.invoke(
+        [input_message, full, next_message],
+    )
+
+
@pytest.mark.default_cassette("test_remote_mcp.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])