make image inputs compatible with langchain_ollama (#24619)

2025-06-23 07:09:31 +00:00 · 2024-07-26 17:39:57 -07:00 · 2024-07-26 17:39:57 -07:00 · 152427eca1
commit 152427eca1
parent 0535d72927
2 changed files with 33 additions and 8 deletions
--- a/libs/partners/ollama/langchain_ollama/chat_models.py
+++ b/libs/partners/ollama/langchain_ollama/chat_models.py
@ -346,7 +346,7 @@ class ChatOllama(BaseChatModel):
    ) -> Sequence[Message]:
        ollama_messages: List = []
        for message in messages:
-            role = ""
+            role: Literal["user", "assistant", "system", "tool"]
            tool_call_id: Optional[str] = None
            tool_calls: Optional[List[Dict[str, Any]]] = None
            if isinstance(message, HumanMessage):
@ -383,11 +383,13 @@ class ChatOllama(BaseChatModel):
                        image_url = None
                        temp_image_url = content_part.get("image_url")
                        if isinstance(temp_image_url, str):
                            image_url = content_part["image_url"]
                        elif (
                            isinstance(temp_image_url, dict) and "url" in temp_image_url
                        ):
                            image_url = temp_image_url
                        elif (
                            isinstance(temp_image_url, dict)
                            and "url" in temp_image_url
                            and isinstance(temp_image_url["url"], str)
                        ):
                            image_url = temp_image_url["url"]
                        else:
                            raise ValueError(
                                "Only string image_url or dict with string 'url' "
@ -408,15 +410,16 @@ class ChatOllama(BaseChatModel):
                            "Must either have type 'text' or type 'image_url' "
                            "with a string 'image_url' field."
                        )
-            msg = {
+            # Should convert to ollama.Message once role includes tool, and tool_call_id is in Message # noqa: E501
            msg: dict = {
                "role": role,
                "content": content,
                "images": images,
            }
            if tool_calls:
                msg["tool_calls"] = tool_calls  # type: ignore
            if tool_call_id:
                msg["tool_call_id"] = tool_call_id
            if tool_calls:
                msg["tool_calls"] = tool_calls
            ollama_messages.append(msg)
        return ollama_messages
--- a/libs/partners/ollama/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/ollama/tests/integration_tests/test_chat_models.py
@ -2,6 +2,8 @@
 from typing import Type
 import pytest
 from langchain_core.language_models import BaseChatModel
 from langchain_standard_tests.integration_tests import ChatModelIntegrationTests
 from langchain_ollama.chat_models import ChatOllama
@ -15,3 +17,23 @@ class TestChatOllama(ChatModelIntegrationTests):
    @property
    def chat_model_params(self) -> dict:
        return {"model": "llama3-groq-tool-use"}
    @property
    def supports_image_inputs(self) -> bool:
        return True
    @pytest.mark.xfail(
        reason=(
            "Fails with 'AssertionError'. Ollama does not support 'tool_choice' yet."
        )
    )
    def test_structured_output(self, model: BaseChatModel) -> None:
        super().test_structured_output(model)
    @pytest.mark.xfail(
        reason=(
            "Fails with 'AssertionError'. Ollama does not support 'tool_choice' yet."
        )
    )
    def test_structured_output_pydantic_2_v1(self, model: BaseChatModel) -> None:
        super().test_structured_output_pydantic_2_v1(model)