From 2faed37ff1150458cda3d105993b29958389cd3b Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 8 Dec 2025 15:34:56 -0500 Subject: [PATCH] feat(anthropic): document and test fine grained tool streaming (#34118) https://platform.claude.com/docs/en/agents-and-tools/tool-use/fine-grained-tool-streaming --- .../langchain_anthropic/chat_models.py | 36 ++++++ .../langchain_anthropic/data/_profiles.py | 18 +++ .../integration_tests/test_chat_models.py | 104 ++++++++++++++++++ .../tests/unit_tests/test_chat_models.py | 50 +++++++++ 4 files changed, 208 insertions(+) diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index 6456167528f..4b2e313b381 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -890,6 +890,42 @@ class ChatAnthropic(BaseChatModel): Total tokens: 408 ``` + ???+ example "Fine-grained tool streaming" + + Fine-grained tool streaming enables faster streaming of tool parameters + without buffering or JSON validation, reducing latency when receiving large tool + parameters. + + More info available in the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/fine-grained-tool-streaming) + + ```python hl_lines="5" + from langchain_anthropic import ChatAnthropic + + model = ChatAnthropic( + model="claude-3-5-sonnet-20241022", + betas=["fine-grained-tool-streaming-2025-05-14"] + ) + + def write_document(title: str, content: str) -> str: + \"\"\"Write a document with the given title and content.\"\"\" + return f"Document '{title}' written" + + model_with_tools = model.bind_tools([write_document]) + + # Stream tool calls with reduced latency + for chunk in model_with_tools.stream( + "Write a document about the benefits of streaming APIs" + ): + print(chunk) + ``` + + !!! note + + This is a beta feature that may return invalid or partial JSON inputs. + + Implement appropriate error handling for incomplete JSON, especially + when `max_tokens` is reached. + ???+ example "Image input" See the [multimodal guide](https://docs.langchain.com/oss/python/langchain/models#multimodal) diff --git a/libs/partners/anthropic/langchain_anthropic/data/_profiles.py b/libs/partners/anthropic/langchain_anthropic/data/_profiles.py index 5d6f748c023..48509532bc7 100644 --- a/libs/partners/anthropic/langchain_anthropic/data/_profiles.py +++ b/libs/partners/anthropic/langchain_anthropic/data/_profiles.py @@ -152,6 +152,24 @@ _PROFILES: dict[str, dict[str, Any]] = { "image_tool_message": True, "structured_output": False, }, + "claude-opus-4-5-20251101": { + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "image_inputs": True, + "audio_inputs": False, + "video_inputs": False, + "image_outputs": False, + "audio_outputs": False, + "video_outputs": False, + "reasoning_output": True, + "tool_calling": True, + "image_url_inputs": True, + "pdf_inputs": True, + "pdf_tool_message": True, + "image_tool_message": True, + "structured_output": False, + "reasoning_effort_control": True, + }, "claude-sonnet-4-5": { "max_input_tokens": 200000, "max_output_tokens": 64000, diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index c79d7edc327..ed6fa71c9ba 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -2152,3 +2152,107 @@ def test_async_shared_client() -> None: llm = ChatAnthropic(model=MODEL_NAME) # type: ignore[call-arg] _ = asyncio.run(llm.ainvoke("Hello")) _ = asyncio.run(llm.ainvoke("Hello")) + + +def test_fine_grained_tool_streaming() -> None: + """Test fine-grained tool streaming reduces latency for tool parameter streaming. + + Fine-grained tool streaming enables Claude to stream tool parameter values. + + https://platform.claude.com/docs/en/agents-and-tools/tool-use/fine-grained-tool-streaming + """ + llm = ChatAnthropic( + model=MODEL_NAME, # type: ignore[call-arg] + temperature=0, + betas=["fine-grained-tool-streaming-2025-05-14"], + ) + + # Define a tool that requires a longer text parameter + tool_definition = { + "name": "write_document", + "description": "Write a document with the given content", + "input_schema": { + "type": "object", + "properties": { + "title": {"type": "string", "description": "Document title"}, + "content": { + "type": "string", + "description": "The full document content", + }, + }, + "required": ["title", "content"], + }, + } + + llm_with_tools = llm.bind_tools([tool_definition]) + query = ( + "Write a document about the benefits of streaming APIs. " + "Include at least 3 paragraphs." + ) + + # Test streaming with fine-grained tool streaming + first = True + chunks: list[BaseMessage | BaseMessageChunk] = [] + tool_call_chunks = [] + + for chunk in llm_with_tools.stream(query): + chunks.append(chunk) + if first: + gathered = chunk + first = False + else: + gathered = gathered + chunk # type: ignore[assignment] + + # Collect tool call chunks + tool_call_chunks.extend( + [ + block + for block in chunk.content_blocks + if block["type"] == "tool_call_chunk" + ] + ) + + # Verify we got chunks + assert len(chunks) > 1 + + # Verify final message has tool call + assert isinstance(gathered, AIMessageChunk) + assert isinstance(gathered.tool_calls, list) + assert len(gathered.tool_calls) >= 1 + + # Find the write_document tool call + write_doc_call = None + for tool_call in gathered.tool_calls: + if tool_call["name"] == "write_document": + write_doc_call = tool_call + break + + assert write_doc_call is not None, "write_document tool call not found" + assert isinstance(write_doc_call["args"], dict) + assert "title" in write_doc_call["args"] + assert "content" in write_doc_call["args"] + assert ( + len(write_doc_call["args"]["content"]) > 100 + ) # Should have substantial content + + # Verify tool_call_chunks were received + # With fine-grained streaming, we should get tool call chunks + assert len(tool_call_chunks) > 0 + + # Verify content_blocks in final message + content_blocks = gathered.content_blocks + assert len(content_blocks) >= 1 + + # Should have at least one tool_call block + tool_call_blocks = [b for b in content_blocks if b["type"] == "tool_call"] + assert len(tool_call_blocks) >= 1 + + write_doc_block = None + for block in tool_call_blocks: + if block["name"] == "write_document": + write_doc_block = block + break + + assert write_doc_block is not None + assert write_doc_block["name"] == "write_document" + assert "args" in write_doc_block diff --git a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py index dced9b63b7a..fee0fa177dd 100644 --- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py @@ -1300,6 +1300,56 @@ def test_anthropic_bind_tools_tool_choice() -> None: } +def test_fine_grained_tool_streaming_beta() -> None: + """Test that fine-grained tool streaming beta can be enabled.""" + # Test with betas parameter at initialization + model = ChatAnthropic( + model=MODEL_NAME, betas=["fine-grained-tool-streaming-2025-05-14"] + ) + + # Create a simple tool + def get_weather(city: str) -> str: + """Get the weather for a city.""" + return f"Weather in {city}" + + model_with_tools = model.bind_tools([get_weather]) + payload = model_with_tools._get_request_payload( # type: ignore[attr-defined] + "What's the weather in SF?", + stream=True, + **model_with_tools.kwargs, # type: ignore[attr-defined] + ) + + # Verify beta header is in payload + assert "fine-grained-tool-streaming-2025-05-14" in payload["betas"] + assert payload["stream"] is True + + # Test combining with other betas + model = ChatAnthropic( + model=MODEL_NAME, + betas=["context-1m-2025-08-07", "fine-grained-tool-streaming-2025-05-14"], + ) + model_with_tools = model.bind_tools([get_weather]) + payload = model_with_tools._get_request_payload( # type: ignore[attr-defined] + "What's the weather?", + stream=True, + **model_with_tools.kwargs, # type: ignore[attr-defined] + ) + assert set(payload["betas"]) == { + "context-1m-2025-08-07", + "fine-grained-tool-streaming-2025-05-14", + } + + # Test that _create routes to beta client when betas are present + model = ChatAnthropic( + model=MODEL_NAME, betas=["fine-grained-tool-streaming-2025-05-14"] + ) + payload = {"betas": ["fine-grained-tool-streaming-2025-05-14"], "stream": True} + + with patch.object(model._client.beta.messages, "create") as mock_beta_create: + model._create(payload) + mock_beta_create.assert_called_once_with(**payload) + + def test_optional_description() -> None: llm = ChatAnthropic(model=MODEL_NAME)