partners/ollama: Enabled Token Level Streaming when Using Bind Tools for ChatOllama (#27689)

**Description:** The issue concerns the unexpected behavior observed using the bind_tools method in LangChain's ChatOllama. When tools are not bound, the llm.stream() method works as expected, returning incremental chunks of content, which is crucial for real-time applications such as conversational agents and live feedback systems. However, when bind_tools([]) is used, the streaming behavior changes, causing the output to be delivered in full chunks rather than incrementally. This change negatively impacts the user experience by breaking the real-time nature of the streaming mechanism. **Issue:** #26971 --------- Co-authored-by: 4meyDam1e <amey.damle@mail.utoronto.ca> Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-08-13 14:50:00 +00:00 · 2024-11-15 11:36:27 -05:00 · 2024-11-15 11:36:27 -05:00 · d696728278
commit d696728278
parent 776e3271e3
1 changed files with 40 additions and 38 deletions
--- a/libs/partners/ollama/langchain_ollama/chat_models.py
+++ b/libs/partners/ollama/langchain_ollama/chat_models.py
@ -327,7 +327,7 @@ class ChatOllama(BaseChatModel):
    """Base url the model is hosted under."""
    client_kwargs: Optional[dict] = {}
-    """Additional kwargs to pass to the httpx Client. 
+    """Additional kwargs to pass to the httpx Client.
    For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
    """
@ -475,26 +475,27 @@ class ChatOllama(BaseChatModel):
                params[key] = kwargs[key]
        params["options"]["stop"] = stop
-        if "tools" in kwargs:
+
-            yield await self._async_client.chat(
+        tools = kwargs.get("tools", None)
-                model=params["model"],
+        stream = tools is None or len(tools) == 0
-                messages=ollama_messages,
+
-                stream=False,
+        chat_params = {
-                options=Options(**params["options"]),
+            "model": params["model"],
-                keep_alive=params["keep_alive"],
+            "messages": ollama_messages,
-                format=params["format"],
+            "stream": stream,
-                tools=kwargs["tools"],
+            "options": Options(**params["options"]),
-            )  # type:ignore
+            "keep_alive": params["keep_alive"],
-        else:
+            "format": params["format"],
-            async for part in await self._async_client.chat(
+        }
-                model=params["model"],
+
-                messages=ollama_messages,
+        if tools is not None:
-                stream=True,
+            chat_params["tools"] = tools
-                options=Options(**params["options"]),
+
-                keep_alive=params["keep_alive"],
+        if stream:
-                format=params["format"],
+            async for part in await self._async_client.chat(**chat_params):
            ):  # type:ignore
                yield part
        else:
            yield await self._async_client.chat(**chat_params)
    def _create_chat_stream(
        self,
@ -513,25 +514,26 @@ class ChatOllama(BaseChatModel):
                params[key] = kwargs[key]
        params["options"]["stop"] = stop
-        if "tools" in kwargs:
+
-            yield self._client.chat(
+        tools = kwargs.get("tools", None)
-                model=params["model"],
+        stream = tools is None or len(tools) == 0
-                messages=ollama_messages,
+
-                stream=False,
+        chat_params = {
-                options=Options(**params["options"]),
+            "model": params["model"],
-                keep_alive=params["keep_alive"],
+            "messages": ollama_messages,
-                format=params["format"],
+            "stream": stream,
-                tools=kwargs["tools"],
+            "options": Options(**params["options"]),
-            )
+            "keep_alive": params["keep_alive"],
            "format": params["format"],
        }
        if tools is not None:
            chat_params["tools"] = tools
        if stream:
            yield from self._client.chat(**chat_params)
        else:
-            yield from self._client.chat(
+            yield self._client.chat(**chat_params)
                model=params["model"],
                messages=ollama_messages,
                stream=True,
                options=Options(**params["options"]),
                keep_alive=params["keep_alive"],
                format=params["format"],
            )
    def _chat_stream_with_aggregation(
        self,