diff --git a/libs/partners/anthropic/Makefile b/libs/partners/anthropic/Makefile index 6feff86bef5..136c4debc85 100644 --- a/libs/partners/anthropic/Makefile +++ b/libs/partners/anthropic/Makefile @@ -14,7 +14,7 @@ test tests: uv run --group test pytest -vvv --disable-socket --allow-unix-socket $(TEST_FILE) integration_test integration_tests: - uv run --group test --group test_integration pytest -vvv --timeout 30 tests/integration_tests/test_chat_models.py + uv run --group test --group test_integration pytest -vvv --timeout 30 $(TEST_FILE) test_watch: uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE) diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index 9a8ec8ba5e8..af677df668e 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -1,10 +1,132 @@ """Test ChatAnthropic chat model.""" -from langchain_core.messages import AIMessageChunk +import json +from base64 import b64encode +from typing import Optional -from langchain_anthropic import ChatAnthropic +import httpx +import pytest +import requests +from anthropic import BadRequestError +from langchain_core.callbacks import CallbackManager +from langchain_core.exceptions import OutputParserException +from langchain_core.messages import ( + AIMessage, + AIMessageChunk, + BaseMessage, + BaseMessageChunk, + HumanMessage, + SystemMessage, + ToolMessage, +) +from langchain_core.outputs import ChatGeneration, LLMResult +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.tools import tool +from pydantic import BaseModel, Field + +from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages +from tests.unit_tests._utils import FakeCallbackHandler MODEL_NAME = "claude-3-5-haiku-latest" +IMAGE_MODEL_NAME = "claude-3-5-sonnet-latest" + + +def test_stream() -> None: + """Test streaming tokens from Anthropic.""" + llm = ChatAnthropicMessages(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg] + + full: Optional[BaseMessageChunk] = None + chunks_with_input_token_counts = 0 + chunks_with_output_token_counts = 0 + chunks_with_model_name = 0 + for token in llm.stream("I'm Pickle Rick"): + assert isinstance(token.content, str) + full = token if full is None else full + token + assert isinstance(token, AIMessageChunk) + if token.usage_metadata is not None: + if token.usage_metadata.get("input_tokens"): + chunks_with_input_token_counts += 1 + if token.usage_metadata.get("output_tokens"): + chunks_with_output_token_counts += 1 + chunks_with_model_name += int("model_name" in token.response_metadata) + if chunks_with_input_token_counts != 1 or chunks_with_output_token_counts != 1: + raise AssertionError( + "Expected exactly one chunk with input or output token counts. " + "AIMessageChunk aggregation adds counts. Check that " + "this is behaving properly." + ) + assert chunks_with_model_name == 1 + # check token usage is populated + assert isinstance(full, AIMessageChunk) + assert full.usage_metadata is not None + assert full.usage_metadata["input_tokens"] > 0 + assert full.usage_metadata["output_tokens"] > 0 + assert full.usage_metadata["total_tokens"] > 0 + assert ( + full.usage_metadata["input_tokens"] + full.usage_metadata["output_tokens"] + == full.usage_metadata["total_tokens"] + ) + assert "stop_reason" in full.response_metadata + assert "stop_sequence" in full.response_metadata + assert "model_name" in full.response_metadata + + +async def test_astream() -> None: + """Test streaming tokens from Anthropic.""" + llm = ChatAnthropicMessages(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg] + + full: Optional[BaseMessageChunk] = None + chunks_with_input_token_counts = 0 + chunks_with_output_token_counts = 0 + async for token in llm.astream("I'm Pickle Rick"): + assert isinstance(token.content, str) + full = token if full is None else full + token + assert isinstance(token, AIMessageChunk) + if token.usage_metadata is not None: + if token.usage_metadata.get("input_tokens"): + chunks_with_input_token_counts += 1 + if token.usage_metadata.get("output_tokens"): + chunks_with_output_token_counts += 1 + if chunks_with_input_token_counts != 1 or chunks_with_output_token_counts != 1: + raise AssertionError( + "Expected exactly one chunk with input or output token counts. " + "AIMessageChunk aggregation adds counts. Check that " + "this is behaving properly." + ) + # check token usage is populated + assert isinstance(full, AIMessageChunk) + assert full.usage_metadata is not None + assert full.usage_metadata["input_tokens"] > 0 + assert full.usage_metadata["output_tokens"] > 0 + assert full.usage_metadata["total_tokens"] > 0 + assert ( + full.usage_metadata["input_tokens"] + full.usage_metadata["output_tokens"] + == full.usage_metadata["total_tokens"] + ) + assert "stop_reason" in full.response_metadata + assert "stop_sequence" in full.response_metadata + + # Check expected raw API output + async_client = llm._async_client + params: dict = { + "model": MODEL_NAME, + "max_tokens": 1024, + "messages": [{"role": "user", "content": "hi"}], + "temperature": 0.0, + } + stream = await async_client.messages.create(**params, stream=True) + async for event in stream: + if event.type == "message_start": + assert event.message.usage.input_tokens > 1 + # Note: this single output token included in message start event + # does not appear to contribute to overall output token counts. It + # is excluded from the total token count. + assert event.message.usage.output_tokens == 1 + elif event.type == "message_delta": + assert event.usage.output_tokens > 1 + else: + pass + async def test_stream_usage() -> None: model = ChatAnthropic(model_name=MODEL_NAME, stream_usage=False) # type: ignore[call-arg] @@ -32,3 +154,723 @@ async def test_async_stream_twice() -> None: async for token in model.astream("hi", stream_usage=False): assert isinstance(token, AIMessageChunk) assert token.usage_metadata is None + + +async def test_abatch() -> None: + """Test streaming tokens from ChatAnthropicMessages.""" + llm = ChatAnthropicMessages(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg] + + result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"]) + for token in result: + assert isinstance(token.content, str) + + +async def test_abatch_tags() -> None: + """Test batch tokens from ChatAnthropicMessages.""" + llm = ChatAnthropicMessages(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg] + + result = await llm.abatch( + ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]} + ) + for token in result: + assert isinstance(token.content, str) + + +async def test_async_tool_use() -> None: + llm = ChatAnthropic( + model=MODEL_NAME, + ) + + llm_with_tools = llm.bind_tools( + [ + { + "name": "get_weather", + "description": "Get weather report for a city", + "input_schema": { + "type": "object", + "properties": {"location": {"type": "string"}}, + }, + } + ] + ) + response = await llm_with_tools.ainvoke("what's the weather in san francisco, ca") + assert isinstance(response, AIMessage) + assert isinstance(response.content, list) + assert isinstance(response.tool_calls, list) + assert len(response.tool_calls) == 1 + tool_call = response.tool_calls[0] + assert tool_call["name"] == "get_weather" + assert isinstance(tool_call["args"], dict) + assert "location" in tool_call["args"] + + # Test streaming + first = True + chunks = [] # type: ignore + async for chunk in llm_with_tools.astream( + "what's the weather in san francisco, ca" + ): + chunks = chunks + [chunk] + if first: + gathered = chunk + first = False + else: + gathered = gathered + chunk # type: ignore + assert len(chunks) > 1 + assert isinstance(gathered, AIMessageChunk) + assert isinstance(gathered.tool_call_chunks, list) + assert len(gathered.tool_call_chunks) == 1 + tool_call_chunk = gathered.tool_call_chunks[0] + assert tool_call_chunk["name"] == "get_weather" + assert isinstance(tool_call_chunk["args"], str) + assert "location" in json.loads(tool_call_chunk["args"]) + + +def test_batch() -> None: + """Test batch tokens from ChatAnthropicMessages.""" + llm = ChatAnthropicMessages(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg] + + result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"]) + for token in result: + assert isinstance(token.content, str) + + +async def test_ainvoke() -> None: + """Test invoke tokens from ChatAnthropicMessages.""" + llm = ChatAnthropicMessages(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg] + + result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]}) + assert isinstance(result.content, str) + assert "model_name" in result.response_metadata + + +def test_invoke() -> None: + """Test invoke tokens from ChatAnthropicMessages.""" + llm = ChatAnthropicMessages(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg] + + result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"])) + assert isinstance(result.content, str) + + +def test_system_invoke() -> None: + """Test invoke tokens with a system message""" + llm = ChatAnthropicMessages(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg] + + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + "You are an expert cartographer. If asked, you are a cartographer. " + "STAY IN CHARACTER", + ), + ("human", "Are you a mathematician?"), + ] + ) + + chain = prompt | llm + + result = chain.invoke({}) + assert isinstance(result.content, str) + + +def test_anthropic_call() -> None: + """Test valid call to anthropic.""" + chat = ChatAnthropic(model=MODEL_NAME) + message = HumanMessage(content="Hello") + response = chat.invoke([message]) + assert isinstance(response, AIMessage) + assert isinstance(response.content, str) + + +def test_anthropic_generate() -> None: + """Test generate method of anthropic.""" + chat = ChatAnthropic(model=MODEL_NAME) + chat_messages: list[list[BaseMessage]] = [ + [HumanMessage(content="How many toes do dogs have?")] + ] + messages_copy = [messages.copy() for messages in chat_messages] + result: LLMResult = chat.generate(chat_messages) + assert isinstance(result, LLMResult) + for response in result.generations[0]: + assert isinstance(response, ChatGeneration) + assert isinstance(response.text, str) + assert response.text == response.message.content + assert chat_messages == messages_copy + + +def test_anthropic_streaming() -> None: + """Test streaming tokens from anthropic.""" + chat = ChatAnthropic(model=MODEL_NAME) + message = HumanMessage(content="Hello") + response = chat.stream([message]) + for token in response: + assert isinstance(token, AIMessageChunk) + assert isinstance(token.content, str) + + +def test_anthropic_streaming_callback() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + chat = ChatAnthropic( + model=MODEL_NAME, + callback_manager=callback_manager, + verbose=True, + ) + message = HumanMessage(content="Write me a sentence with 10 words.") + for token in chat.stream([message]): + assert isinstance(token, AIMessageChunk) + assert isinstance(token.content, str) + assert callback_handler.llm_streams > 1 + + +async def test_anthropic_async_streaming_callback() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + chat = ChatAnthropic( + model=MODEL_NAME, + callback_manager=callback_manager, + verbose=True, + ) + chat_messages: list[BaseMessage] = [ + HumanMessage(content="How many toes do dogs have?") + ] + async for token in chat.astream(chat_messages): + assert isinstance(token, AIMessageChunk) + assert isinstance(token.content, str) + assert callback_handler.llm_streams > 1 + + +def test_anthropic_multimodal() -> None: + """Test that multimodal inputs are handled correctly.""" + chat = ChatAnthropic(model=IMAGE_MODEL_NAME) + messages: list[BaseMessage] = [ + HumanMessage( + content=[ + { + "type": "image_url", + "image_url": { + # langchain logo + "url": "", # noqa: E501 + }, + }, + {"type": "text", "text": "What is this a logo for?"}, + ] + ) + ] + response = chat.invoke(messages) + assert isinstance(response, AIMessage) + assert isinstance(response.content, str) + num_tokens = chat.get_num_tokens_from_messages(messages) + assert num_tokens > 0 + + +def test_streaming() -> None: + """Test streaming tokens from Anthropic.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + + llm = ChatAnthropicMessages( # type: ignore[call-arg, call-arg] + model_name=MODEL_NAME, streaming=True, callback_manager=callback_manager + ) + + response = llm.generate([[HumanMessage(content="I'm Pickle Rick")]]) + assert callback_handler.llm_streams > 0 + assert isinstance(response, LLMResult) + + +async def test_astreaming() -> None: + """Test streaming tokens from Anthropic.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + + llm = ChatAnthropicMessages( # type: ignore[call-arg, call-arg] + model_name=MODEL_NAME, streaming=True, callback_manager=callback_manager + ) + + response = await llm.agenerate([[HumanMessage(content="I'm Pickle Rick")]]) + assert callback_handler.llm_streams > 0 + assert isinstance(response, LLMResult) + + +def test_tool_use() -> None: + llm = ChatAnthropic( + model="claude-3-7-sonnet-20250219", + temperature=0, + ) + tool_definition = { + "name": "get_weather", + "description": "Get weather report for a city", + "input_schema": { + "type": "object", + "properties": {"location": {"type": "string"}}, + }, + } + llm_with_tools = llm.bind_tools([tool_definition]) + query = "how are you? what's the weather in san francisco, ca" + response = llm_with_tools.invoke(query) + assert isinstance(response, AIMessage) + assert isinstance(response.content, list) + assert isinstance(response.tool_calls, list) + assert len(response.tool_calls) == 1 + tool_call = response.tool_calls[0] + assert tool_call["name"] == "get_weather" + assert isinstance(tool_call["args"], dict) + assert "location" in tool_call["args"] + + # Test streaming + llm = ChatAnthropic( + model="claude-3-7-sonnet-20250219", + temperature=0, + # Add extra headers to also test token-efficient tools + model_kwargs={ + "extra_headers": {"anthropic-beta": "token-efficient-tools-2025-02-19"} + }, + ) + llm_with_tools = llm.bind_tools([tool_definition]) + first = True + chunks = [] # type: ignore + for chunk in llm_with_tools.stream(query): + chunks = chunks + [chunk] + if first: + gathered = chunk + first = False + else: + gathered = gathered + chunk # type: ignore + assert len(chunks) > 1 + assert isinstance(gathered.content, list) + assert len(gathered.content) == 2 + tool_use_block = None + for content_block in gathered.content: + assert isinstance(content_block, dict) + if content_block["type"] == "tool_use": + tool_use_block = content_block + break + assert tool_use_block is not None + assert tool_use_block["name"] == "get_weather" + assert "location" in json.loads(tool_use_block["partial_json"]) + assert isinstance(gathered, AIMessageChunk) + assert isinstance(gathered.tool_calls, list) + assert len(gathered.tool_calls) == 1 + tool_call = gathered.tool_calls[0] + assert tool_call["name"] == "get_weather" + assert isinstance(tool_call["args"], dict) + assert "location" in tool_call["args"] + assert tool_call["id"] is not None + + # Testing token-efficient tools + # https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use + assert gathered.usage_metadata + assert response.usage_metadata + assert ( + gathered.usage_metadata["total_tokens"] + < response.usage_metadata["total_tokens"] + ) + + # Test passing response back to model + stream = llm_with_tools.stream( + [ + query, + gathered, + ToolMessage(content="sunny and warm", tool_call_id=tool_call["id"]), + ] + ) + chunks = [] # type: ignore + first = True + for chunk in stream: + chunks = chunks + [chunk] + if first: + gathered = chunk + first = False + else: + gathered = gathered + chunk # type: ignore + assert len(chunks) > 1 + + +def test_builtin_tools() -> None: + llm = ChatAnthropic(model="claude-3-7-sonnet-20250219") + tool = {"type": "text_editor_20250124", "name": "str_replace_editor"} + llm_with_tools = llm.bind_tools([tool]) + response = llm_with_tools.invoke( + "There's a syntax error in my primes.py file. Can you help me fix it?" + ) + assert isinstance(response, AIMessage) + assert response.tool_calls + + +class GenerateUsername(BaseModel): + "Get a username based on someone's name and hair color." + + name: str + hair_color: str + + +def test_disable_parallel_tool_calling() -> None: + llm = ChatAnthropic(model="claude-3-5-sonnet-20241022") + llm_with_tools = llm.bind_tools([GenerateUsername], parallel_tool_calls=False) + result = llm_with_tools.invoke( + "Use the GenerateUsername tool to generate user names for:\n\n" + "Sally with green hair\n" + "Bob with blue hair" + ) + assert isinstance(result, AIMessage) + assert len(result.tool_calls) == 1 + + +def test_anthropic_with_empty_text_block() -> None: + """Anthropic SDK can return an empty text block.""" + + @tool + def type_letter(letter: str) -> str: + """Type the given letter.""" + return "OK" + + model = ChatAnthropic(model="claude-3-opus-20240229", temperature=0).bind_tools( + [type_letter] + ) + + messages = [ + SystemMessage( + content="Repeat the given string using the provided tools. Do not write " + "anything else or provide any explanations. For example, " + "if the string is 'abc', you must print the " + "letters 'a', 'b', and 'c' one at a time and in that order. " + ), + HumanMessage(content="dog"), + AIMessage( + content=[ + {"text": "", "type": "text"}, + { + "id": "toolu_01V6d6W32QGGSmQm4BT98EKk", + "input": {"letter": "d"}, + "name": "type_letter", + "type": "tool_use", + }, + ], + tool_calls=[ + { + "name": "type_letter", + "args": {"letter": "d"}, + "id": "toolu_01V6d6W32QGGSmQm4BT98EKk", + "type": "tool_call", + }, + ], + ), + ToolMessage(content="OK", tool_call_id="toolu_01V6d6W32QGGSmQm4BT98EKk"), + ] + + model.invoke(messages) + + +def test_with_structured_output() -> None: + llm = ChatAnthropic( + model="claude-3-opus-20240229", + ) + + structured_llm = llm.with_structured_output( + { + "name": "get_weather", + "description": "Get weather report for a city", + "input_schema": { + "type": "object", + "properties": {"location": {"type": "string"}}, + }, + } + ) + response = structured_llm.invoke("what's the weather in san francisco, ca") + assert isinstance(response, dict) + assert response["location"] + + +def test_get_num_tokens_from_messages() -> None: + llm = ChatAnthropic(model="claude-3-5-sonnet-20241022") + + # Test simple case + messages = [ + SystemMessage(content="You are a scientist"), + HumanMessage(content="Hello, Claude"), + ] + num_tokens = llm.get_num_tokens_from_messages(messages) + assert num_tokens > 0 + + # Test tool use + @tool(parse_docstring=True) + def get_weather(location: str) -> str: + """Get the current weather in a given location + + Args: + location: The city and state, e.g. San Francisco, CA + """ + return "Sunny" + + messages = [ + HumanMessage(content="What's the weather like in San Francisco?"), + ] + num_tokens = llm.get_num_tokens_from_messages(messages, tools=[get_weather]) + assert num_tokens > 0 + + messages = [ + HumanMessage(content="What's the weather like in San Francisco?"), + AIMessage( + content=[ + {"text": "Let's see.", "type": "text"}, + { + "id": "toolu_01V6d6W32QGGSmQm4BT98EKk", + "input": {"location": "SF"}, + "name": "get_weather", + "type": "tool_use", + }, + ], + tool_calls=[ + { + "name": "get_weather", + "args": {"location": "SF"}, + "id": "toolu_01V6d6W32QGGSmQm4BT98EKk", + "type": "tool_call", + }, + ], + ), + ToolMessage(content="Sunny", tool_call_id="toolu_01V6d6W32QGGSmQm4BT98EKk"), + ] + num_tokens = llm.get_num_tokens_from_messages(messages, tools=[get_weather]) + assert num_tokens > 0 + + +class GetWeather(BaseModel): + """Get the current weather in a given location""" + + location: str = Field(..., description="The city and state, e.g. San Francisco, CA") + + +@pytest.mark.parametrize("tool_choice", ["GetWeather", "auto", "any"]) +def test_anthropic_bind_tools_tool_choice(tool_choice: str) -> None: + chat_model = ChatAnthropic( + model=MODEL_NAME, + ) + chat_model_with_tools = chat_model.bind_tools([GetWeather], tool_choice=tool_choice) + response = chat_model_with_tools.invoke("what's the weather in ny and la") + assert isinstance(response, AIMessage) + + +def test_pdf_document_input() -> None: + url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" + data = b64encode(requests.get(url).content).decode() + + result = ChatAnthropic(model=IMAGE_MODEL_NAME).invoke( + [ + HumanMessage( + [ + "summarize this document", + { + "type": "document", + "source": { + "type": "base64", + "data": data, + "media_type": "application/pdf", + }, + }, + ] + ) + ] + ) + assert isinstance(result, AIMessage) + assert isinstance(result.content, str) + assert len(result.content) > 0 + + +def test_citations() -> None: + llm = ChatAnthropic(model="claude-3-5-haiku-latest") + messages = [ + { + "role": "user", + "content": [ + { + "type": "document", + "source": { + "type": "content", + "content": [ + {"type": "text", "text": "The grass is green"}, + {"type": "text", "text": "The sky is blue"}, + ], + }, + "citations": {"enabled": True}, + }, + {"type": "text", "text": "What color is the grass and sky?"}, + ], + } + ] + response = llm.invoke(messages) + assert isinstance(response, AIMessage) + assert isinstance(response.content, list) + assert any("citations" in block for block in response.content) + + # Test streaming + full: Optional[BaseMessageChunk] = None + for chunk in llm.stream(messages): + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert isinstance(full.content, list) + assert any("citations" in block for block in full.content) + assert not any("citation" in block for block in full.content) + + +def test_thinking() -> None: + llm = ChatAnthropic( + model="claude-3-7-sonnet-latest", + max_tokens=5_000, + thinking={"type": "enabled", "budget_tokens": 2_000}, + ) + response = llm.invoke("Hello") + assert any("thinking" in block for block in response.content) + for block in response.content: + assert isinstance(block, dict) + if block["type"] == "thinking": + assert set(block.keys()) == {"type", "thinking", "signature"} + assert block["thinking"] and isinstance(block["thinking"], str) + assert block["signature"] and isinstance(block["signature"], str) + + # Test streaming + full: Optional[BaseMessageChunk] = None + for chunk in llm.stream("Hello"): + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert isinstance(full.content, list) + assert any("thinking" in block for block in full.content) + for block in full.content: + assert isinstance(block, dict) + if block["type"] == "thinking": + assert set(block.keys()) == {"type", "thinking", "signature", "index"} + assert block["thinking"] and isinstance(block["thinking"], str) + assert block["signature"] and isinstance(block["signature"], str) + + +@pytest.mark.flaky(retries=3, delay=1) +def test_redacted_thinking() -> None: + llm = ChatAnthropic( + model="claude-3-7-sonnet-latest", + max_tokens=5_000, + thinking={"type": "enabled", "budget_tokens": 2_000}, + ) + query = "ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB" # noqa: E501 + + response = llm.invoke(query) + has_reasoning = False + for block in response.content: + assert isinstance(block, dict) + if block["type"] == "redacted_thinking": + has_reasoning = True + assert set(block.keys()) == {"type", "data"} + assert block["data"] and isinstance(block["data"], str) + assert has_reasoning + + # Test streaming + full: Optional[BaseMessageChunk] = None + for chunk in llm.stream(query): + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert isinstance(full.content, list) + stream_has_reasoning = False + for block in full.content: + assert isinstance(block, dict) + if block["type"] == "redacted_thinking": + stream_has_reasoning = True + assert set(block.keys()) == {"type", "data", "index"} + assert block["data"] and isinstance(block["data"], str) + assert stream_has_reasoning + + +def test_structured_output_thinking_enabled() -> None: + llm = ChatAnthropic( + model="claude-3-7-sonnet-latest", + max_tokens=5_000, + thinking={"type": "enabled", "budget_tokens": 2_000}, + ) + with pytest.warns(match="structured output"): + structured_llm = llm.with_structured_output(GenerateUsername) + query = "Generate a username for Sally with green hair" + response = structured_llm.invoke(query) + assert isinstance(response, GenerateUsername) + + with pytest.raises(OutputParserException): + structured_llm.invoke("Hello") + + # Test streaming + for chunk in structured_llm.stream(query): + assert isinstance(chunk, GenerateUsername) + + +def test_structured_output_thinking_force_tool_use() -> None: + # Structured output currently relies on forced tool use, which is not supported + # when `thinking` is enabled. When this test fails, it means that the feature + # is supported and the workarounds in `with_structured_output` should be removed. + llm = ChatAnthropic( + model="claude-3-7-sonnet-latest", + max_tokens=5_000, + thinking={"type": "enabled", "budget_tokens": 2_000}, + ).bind_tools( + [GenerateUsername], + tool_choice="GenerateUsername", + ) + with pytest.raises(BadRequestError): + llm.invoke("Generate a username for Sally with green hair") + + +def test_image_tool_calling() -> None: + """Test tool calling with image inputs.""" + + class color_picker(BaseModel): + """Input your fav color and get a random fact about it.""" + + fav_color: str + + human_content: list[dict] = [ + { + "type": "text", + "text": "what's your favorite color in this image", + }, + ] + image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + image_data = b64encode(httpx.get(image_url).content).decode("utf-8") + human_content.append( + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": image_data, + }, + } + ) + messages = [ + SystemMessage("you're a good assistant"), + HumanMessage(human_content), # type: ignore[arg-type] + AIMessage( + [ + {"type": "text", "text": "Hmm let me think about that"}, + { + "type": "tool_use", + "input": {"fav_color": "green"}, + "id": "foo", + "name": "color_picker", + }, + ] + ), + HumanMessage( + [ + { + "type": "tool_result", + "tool_use_id": "foo", + "content": [ + { + "type": "text", + "text": "green is a great pick! that's my sister's favorite color", # noqa: E501 + } + ], + "is_error": False, + }, + {"type": "text", "text": "what's my sister's favorite color"}, + ] + ), + ] + llm = ChatAnthropic(model="claude-3-5-sonnet-latest") + llm.bind_tools([color_picker]).invoke(messages)