diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index aa3fe27dd5b..45f17046145 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -1191,6 +1191,8 @@ def convert_to_openai_messages( }, } ) + elif block.get("type") == "thinking": + content.append(block) else: err = ( f"Unrecognized content block at " diff --git a/libs/core/tests/unit_tests/messages/test_utils.py b/libs/core/tests/unit_tests/messages/test_utils.py index 8777b2674c5..3b223b4864f 100644 --- a/libs/core/tests/unit_tests/messages/test_utils.py +++ b/libs/core/tests/unit_tests/messages/test_utils.py @@ -832,6 +832,18 @@ def test_convert_to_openai_messages_anthropic() -> None: ] assert result == expected + # Test thinking blocks (pass through) + thinking_block = { + "signature": "abc123", + "thinking": "Thinking text.", + "type": "thinking", + } + text_block = {"text": "Response text.", "type": "text"} + messages = [AIMessage([thinking_block, text_block])] + result = convert_to_openai_messages(messages) + expected = [{"role": "assistant", "content": [thinking_block, text_block]}] + assert result == expected + def test_convert_to_openai_messages_bedrock_converse_image() -> None: image_data = create_image_data() diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index cde69d25bcc..c61817972f3 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -4,6 +4,7 @@ import json from base64 import b64encode from typing import List, Optional +import httpx import pytest import requests from anthropic import BadRequestError @@ -768,3 +769,64 @@ def test_structured_output_thinking_force_tool_use() -> None: ) with pytest.raises(BadRequestError): llm.invoke("Generate a username for Sally with green hair") + + +def test_image_tool_calling() -> None: + """Test tool calling with image inputs.""" + + class color_picker(BaseModel): + """Input your fav color and get a random fact about it.""" + + fav_color: str + + human_content: List[dict] = [ + { + "type": "text", + "text": "what's your favorite color in this image", + }, + ] + image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + image_data = b64encode(httpx.get(image_url).content).decode("utf-8") + human_content.append( + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": image_data, + }, + } + ) + messages = [ + SystemMessage("you're a good assistant"), + HumanMessage(human_content), # type: ignore[arg-type] + AIMessage( + [ + {"type": "text", "text": "Hmm let me think about that"}, + { + "type": "tool_use", + "input": {"fav_color": "green"}, + "id": "foo", + "name": "color_picker", + }, + ] + ), + HumanMessage( + [ + { + "type": "tool_result", + "tool_use_id": "foo", + "content": [ + { + "type": "text", + "text": "green is a great pick! that's my sister's favorite color", # noqa: E501 + } + ], + "is_error": False, + }, + {"type": "text", "text": "what's my sister's favorite color"}, + ] + ), + ] + llm = ChatAnthropic(model="claude-3-5-sonnet-latest") + llm.bind_tools([color_picker]).invoke(messages) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index e40c5bf98d1..7cc4b2d9cf3 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -186,15 +186,38 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage: def _format_message_content(content: Any) -> Any: """Format message content.""" if content and isinstance(content, list): - # Remove unexpected block types formatted_content = [] for block in content: + # Remove unexpected block types if ( isinstance(block, dict) and "type" in block - and block["type"] == "tool_use" + and block["type"] in ("tool_use", "thinking") ): continue + # Anthropic image blocks + elif ( + isinstance(block, dict) + and block.get("type") == "image" + and (source := block.get("source")) + and isinstance(source, dict) + ): + if source.get("type") == "base64" and ( + (media_type := source.get("media_type")) + and (data := source.get("data")) + ): + formatted_content.append( + { + "type": "image_url", + "image_url": {"url": f"data:{media_type};base64,{data}"}, + } + ) + elif source.get("type") == "url" and (url := source.get("url")): + formatted_content.append( + {"type": "image_url", "image_url": {"url": url}} + ) + else: + continue else: formatted_content.append(block) else: diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py index f131dbaa2bb..53528808c2a 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py @@ -29,6 +29,10 @@ class TestOpenAIStandard(ChatModelIntegrationTests): def supports_json_mode(self) -> bool: return True + @property + def supports_anthropic_inputs(self) -> bool: + return True + @property def supported_usage_metadata_details( self, diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py index abbe9fdf27a..533abc2da9d 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py @@ -1960,7 +1960,7 @@ class ChatModelIntegrationTests(ChatModelTests): set the ``supports_anthropic_inputs`` property to False. """ # noqa: E501 if not self.supports_anthropic_inputs: - return + pytest.skip("Model does not explicitly support Anthropic inputs.") class color_picker(BaseModelV1): """Input your fav color and get a random fact about it.""" @@ -1998,26 +1998,55 @@ class ChatModelIntegrationTests(ChatModelTests): "id": "foo", "name": "color_picker", }, + ], + tool_calls=[ + { + "name": "color_picker", + "args": {"fav_color": "green"}, + "id": "foo", + "type": "tool_call", + } + ], + ), + ToolMessage("That's a great pick!", tool_call_id="foo"), + ] + response = model.bind_tools([color_picker]).invoke(messages) + assert isinstance(response, AIMessage) + + # Test thinking blocks + messages = [ + HumanMessage( + [ + { + "type": "text", + "text": "Hello", + }, + ] + ), + AIMessage( + [ + { + "type": "thinking", + "thinking": "I'm thinking...", + "signature": "abc123", + }, + { + "type": "text", + "text": "Hello, how are you?", + }, ] ), HumanMessage( [ { - "type": "tool_result", - "tool_use_id": "foo", - "content": [ - { - "type": "text", - "text": "green is a great pick! that's my sister's favorite color", # noqa: E501 - } - ], - "is_error": False, + "type": "text", + "text": "Well, thanks.", }, - {"type": "text", "text": "what's my sister's favorite color"}, ] ), ] - model.bind_tools([color_picker]).invoke(messages) + response = model.invoke(messages) + assert isinstance(response, AIMessage) def test_tool_message_error_status( self, model: BaseChatModel, my_adder_tool: BaseTool