diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index f93456d88dc..e9f3b82b894 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -499,6 +499,8 @@ def _format_messages( content.append({**block, "content": tool_content}) elif block["type"] in ( "code_execution_tool_result", + "bash_code_execution_tool_result", + "text_editor_code_execution_tool_result", "mcp_tool_result", "web_search_tool_result", "web_fetch_tool_result", @@ -2543,7 +2545,6 @@ def _make_message_chunk_from_anthropic_event( message_chunk: AIMessageChunk | None = None # Reference: Anthropic SDK streaming implementation # https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/lib/streaming/_messages.py # noqa: E501 - if event.type == "message_start" and stream_usage: # Capture model name, but don't include usage_metadata yet # as it will be properly reported in message_delta with complete info @@ -2560,17 +2561,11 @@ def _make_message_chunk_from_anthropic_event( elif ( event.type == "content_block_start" and event.content_block is not None - and event.content_block.type - in ( - "tool_use", # Standard tool usage - "code_execution_tool_result", # Built-in code execution results - "document", - "redacted_thinking", - "mcp_tool_use", - "mcp_tool_result", - "server_tool_use", # Server-side tool usage - "web_search_tool_result", # Built-in web search results - "web_fetch_tool_result", # Built-in web fetch results, + and ( + "tool_result" in event.content_block.type + or "tool_use" in event.content_block.type + or "document" in event.content_block.type + or "redacted_thinking" in event.content_block.type ) ): if coerce_content_to_string: diff --git a/libs/partners/anthropic/tests/cassettes/test_code_execution.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_code_execution.yaml.gz new file mode 100644 index 00000000000..dd71c3d67ec Binary files /dev/null and b/libs/partners/anthropic/tests/cassettes/test_code_execution.yaml.gz differ diff --git a/libs/partners/anthropic/tests/cassettes/test_code_execution[v0].yaml.gz b/libs/partners/anthropic/tests/cassettes/test_code_execution_old[v0].yaml.gz similarity index 100% rename from libs/partners/anthropic/tests/cassettes/test_code_execution[v0].yaml.gz rename to libs/partners/anthropic/tests/cassettes/test_code_execution_old[v0].yaml.gz diff --git a/libs/partners/anthropic/tests/cassettes/test_code_execution[v1].yaml.gz b/libs/partners/anthropic/tests/cassettes/test_code_execution_old[v1].yaml.gz similarity index 100% rename from libs/partners/anthropic/tests/cassettes/test_code_execution[v1].yaml.gz rename to libs/partners/anthropic/tests/cassettes/test_code_execution_old[v1].yaml.gz diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index f8f5111da39..bcc5d31390d 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -1505,10 +1505,13 @@ def test_web_fetch_v1(output_version: Literal["v0", "v1"]) -> None: @pytest.mark.vcr @pytest.mark.parametrize("output_version", ["v0", "v1"]) -def test_code_execution(output_version: Literal["v0", "v1"]) -> None: - """Note: this is a beta feature. +def test_code_execution_old(output_version: Literal["v0", "v1"]) -> None: + """Note: this tests the `code_execution_20250522` tool, which is now legacy. - TODO: Update to remove beta once generally available. + See the `test_code_execution` test below to test the current + `code_execution_20250825` tool. + + Migration guide: https://docs.claude.com/en/docs/agents-and-tools/tool-use/code-execution-tool#upgrade-to-latest-tool-version """ llm = ChatAnthropic( model=MODEL_NAME, # type: ignore[call-arg] @@ -1562,6 +1565,76 @@ def test_code_execution(output_version: Literal["v0", "v1"]) -> None: ) +@pytest.mark.default_cassette("test_code_execution.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_code_execution(output_version: Literal["v0", "v1"]) -> None: + """Note: this is a beta feature. + + TODO: Update to remove beta once generally available. + """ + llm = ChatAnthropic( + model=MODEL_NAME, # type: ignore[call-arg] + betas=["code-execution-2025-08-25"], + output_version=output_version, + ) + + tool = {"type": "code_execution_20250825", "name": "code_execution"} + llm_with_tools = llm.bind_tools([tool]) + + input_message = { + "role": "user", + "content": [ + { + "type": "text", + "text": ( + "Calculate the mean and standard deviation of " + "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]" + ), + }, + ], + } + response = llm_with_tools.invoke([input_message]) + assert all(isinstance(block, dict) for block in response.content) + block_types = {block["type"] for block in response.content} # type: ignore[index] + if output_version == "v0": + assert block_types == { + "text", + "server_tool_use", + "text_editor_code_execution_tool_result", + "bash_code_execution_tool_result", + } + else: + assert block_types == {"text", "server_tool_call", "server_tool_result"} + + # Test streaming + full: BaseMessageChunk | None = None + for chunk in llm_with_tools.stream([input_message]): + assert isinstance(chunk, AIMessageChunk) + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert isinstance(full.content, list) + block_types = {block["type"] for block in full.content} # type: ignore[index] + if output_version == "v0": + assert block_types == { + "text", + "server_tool_use", + "text_editor_code_execution_tool_result", + "bash_code_execution_tool_result", + } + else: + assert block_types == {"text", "server_tool_call", "server_tool_result"} + + # Test we can pass back in + next_message = { + "role": "user", + "content": "Please add more comments to the code.", + } + _ = llm_with_tools.invoke( + [input_message, full, next_message], + ) + + @pytest.mark.default_cassette("test_remote_mcp.yaml.gz") @pytest.mark.vcr @pytest.mark.parametrize("output_version", ["v0", "v1"])