update tests

2025-08-15 07:36:08 +00:00 · 2025-04-09 10:10:17 -04:00 · 2025-04-09 10:10:17 -04:00 · 35fbe24532
commit 35fbe24532
parent 0354dec091
4 changed files with 82 additions and 67 deletions
--- a/libs/partners/anthropic/langchain_anthropic/chat_models.py
+++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py
@ -253,7 +253,7 @@ def _format_data_content_block(block: dict) -> dict:


 def _format_messages(
-    messages: List[BaseMessage],
+    messages: Sequence[BaseMessage],
 ) -> Tuple[Union[str, List[Dict], None], List[Dict]]:
    """Format messages for anthropic."""

--- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
@ -663,34 +663,6 @@ def test_pdf_document_input() -> None:
    assert isinstance(result.content, str)
    assert len(result.content) > 0

-    # Test cache control with standard format
-    result = ChatAnthropic(model=IMAGE_MODEL_NAME).invoke(
-        [
-            HumanMessage(
-                [
-                    {
-                        "type": "text",
-                        "text": "Summarize this document:",
-                    },
-                    {
-                        "type": "file",
-                        "source_type": "base64",
-                        "mime_type": "application/pdf",
-                        "source": data,
-                        "metadata": {"cache_control": {"type": "ephemeral"}},
-                    },
-                ]
-            )
-        ]
-    )
-    assert isinstance(result, AIMessage)
-    assert isinstance(result.content, str)
-    assert len(result.content) > 0
-    assert result.usage_metadata is not None
-    cache_creation = result.usage_metadata["input_token_details"]["cache_creation"]
-    cache_read = result.usage_metadata["input_token_details"]["cache_read"]
-    assert cache_creation > 0 or cache_read > 0
-

 def test_citations() -> None:
    llm = ChatAnthropic(model="claude-3-5-haiku-latest")
@ -727,27 +699,6 @@ def test_citations() -> None:
    assert any("citations" in block for block in full.content)
    assert not any("citation" in block for block in full.content)

-    # Test standard format
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "file",
-                    "source_type": "text",
-                    "source": "The grass is green. The sky is blue.",
-                    "mime_type": "text/plain",
-                    "metadata": {"citations": {"enabled": True}},
-                },
-                {"type": "text", "text": "What color is the grass and sky?"},
-            ],
-        }
-    ]
-    response = llm.invoke(messages)
-    assert isinstance(response, AIMessage)
-    assert isinstance(response.content, list)
-    assert any("citations" in block for block in response.content)
-

 def test_thinking() -> None:
    llm = ChatAnthropic(
--- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
@ -690,6 +690,85 @@ def test__format_messages_with_cache_control() -> None:
    assert expected_system == actual_system
    assert expected_messages == actual_messages

+    # Test standard multi-modal format
+    messages = [
+        HumanMessage(
+            [
+                {
+                    "type": "text",
+                    "text": "Summarize this document:",
+                },
+                {
+                    "type": "file",
+                    "source_type": "base64",
+                    "mime_type": "application/pdf",
+                    "source": "<base64 data>",
+                    "metadata": {"cache_control": {"type": "ephemeral"}},
+                },
+            ]
+        )
+    ]
+    actual_system, actual_messages = _format_messages(messages)
+    assert actual_system is None
+    expected_messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Summarize this document:",
+                },
+                {
+                    "type": "document",
+                    "source": {
+                        "type": "base64",
+                        "media_type": "application/pdf",
+                        "data": "<base64 data>",
+                    },
+                    "cache_control": {"type": "ephemeral"},
+                },
+            ],
+        }
+    ]
+    assert actual_messages == expected_messages
+
+
+def test__format_messages_with_citations() -> None:
+    input_messages = [
+        HumanMessage(
+            content=[
+                {
+                    "type": "file",
+                    "source_type": "text",
+                    "source": "The grass is green. The sky is blue.",
+                    "mime_type": "text/plain",
+                    "metadata": {"citations": {"enabled": True}},
+                },
+                {"type": "text", "text": "What color is the grass and sky?"},
+            ]
+        )
+    ]
+    expected_messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "document",
+                    "source": {
+                        "type": "text",
+                        "media_type": "text/plain",
+                        "data": "The grass is green. The sky is blue.",
+                    },
+                    "citations": {"enabled": True},
+                },
+                {"type": "text", "text": "What color is the grass and sky?"},
+            ],
+        }
+    ]
+    actual_system, actual_messages = _format_messages(input_messages)
+    assert actual_system is None
+    assert actual_messages == expected_messages
+

 def test__format_messages_with_multiple_system() -> None:
    messages = [
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@ -68,6 +68,7 @@ from langchain_core.messages import (
    ToolCall,
    ToolMessage,
    ToolMessageChunk,
+    convert_image_content_block_to_image_url,
    is_data_content_block,
 )
 from langchain_core.messages.ai import (
@ -195,23 +196,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
 def _format_data_content_block(block: dict) -> dict:
    """Format standard data content block to format expected by OpenAI."""
    if block["type"] == "image":
-        if block["source_type"] == "url":
-            formatted_block = {
-                "type": "image_url",
-                "image_url": {"url": block["source"]},
-            }
-        elif block["source_type"] == "base64":
-            formatted_block = {
-                "type": "image_url",
-                "image_url": {
-                    "url": f"data:{block['mime_type']};base64,{block['source']}"
-                },
-            }
-        else:
-            raise ValueError(
-                "OpenAI only supports 'url' and 'base64' source_type for image "
-                "content blocks."
-            )
+        formatted_block = convert_image_content_block_to_image_url(block)  # type: ignore[arg-type]

    elif block["type"] == "file":
        if block["source_type"] == "base64":