partners/ollama: Enabled Token Level Streaming when Using Bind Tools for ChatOllama (#27689)

**Description:** The issue concerns the unexpected behavior observed using the bind_tools method in LangChain's ChatOllama. When tools are not bound, the llm.stream() method works as expected, returning incremental chunks of content, which is crucial for real-time applications such as conversational agents and live feedback systems. However, when bind_tools([]) is used, the streaming behavior changes, causing the output to be delivered in full chunks rather than incrementally. This change negatively impacts the user experience by breaking the real-time nature of the streaming mechanism. **Issue:** #26971 --------- Co-authored-by: 4meyDam1e <amey.damle@mail.utoronto.ca> Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-08-16 16:11:02 +00:00 · 2024-11-15 11:36:27 -05:00 · 2024-11-15 11:36:27 -05:00 · d696728278
commit d696728278
parent 776e3271e3
1 changed files with 40 additions and 38 deletions
--- a/libs/partners/ollama/langchain_ollama/chat_models.py
+++ b/libs/partners/ollama/langchain_ollama/chat_models.py
@ -327,7 +327,7 @@ class ChatOllama(BaseChatModel):
    """Base url the model is hosted under."""

    client_kwargs: Optional[dict] = {}
-    """Additional kwargs to pass to the httpx Client. 
+    """Additional kwargs to pass to the httpx Client.
    For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
    """

@ -475,26 +475,27 @@ class ChatOllama(BaseChatModel):
                params[key] = kwargs[key]

        params["options"]["stop"] = stop
-        if "tools" in kwargs:
-            yield await self._async_client.chat(
-                model=params["model"],
-                messages=ollama_messages,
-                stream=False,
-                options=Options(**params["options"]),
-                keep_alive=params["keep_alive"],
-                format=params["format"],
-                tools=kwargs["tools"],
-            )  # type:ignore
-        else:
-            async for part in await self._async_client.chat(
-                model=params["model"],
-                messages=ollama_messages,
-                stream=True,
-                options=Options(**params["options"]),
-                keep_alive=params["keep_alive"],
-                format=params["format"],
-            ):  # type:ignore
+
+        tools = kwargs.get("tools", None)
+        stream = tools is None or len(tools) == 0
+
+        chat_params = {
+            "model": params["model"],
+            "messages": ollama_messages,
+            "stream": stream,
+            "options": Options(**params["options"]),
+            "keep_alive": params["keep_alive"],
+            "format": params["format"],
+        }
+
+        if tools is not None:
+            chat_params["tools"] = tools
+
+        if stream:
+            async for part in await self._async_client.chat(**chat_params):
                yield part
+        else:
+            yield await self._async_client.chat(**chat_params)

    def _create_chat_stream(
        self,
@ -513,25 +514,26 @@ class ChatOllama(BaseChatModel):
                params[key] = kwargs[key]

        params["options"]["stop"] = stop
-        if "tools" in kwargs:
-            yield self._client.chat(
-                model=params["model"],
-                messages=ollama_messages,
-                stream=False,
-                options=Options(**params["options"]),
-                keep_alive=params["keep_alive"],
-                format=params["format"],
-                tools=kwargs["tools"],
-            )
+
+        tools = kwargs.get("tools", None)
+        stream = tools is None or len(tools) == 0
+
+        chat_params = {
+            "model": params["model"],
+            "messages": ollama_messages,
+            "stream": stream,
+            "options": Options(**params["options"]),
+            "keep_alive": params["keep_alive"],
+            "format": params["format"],
+        }
+
+        if tools is not None:
+            chat_params["tools"] = tools
+
+        if stream:
+            yield from self._client.chat(**chat_params)
        else:
-            yield from self._client.chat(
-                model=params["model"],
-                messages=ollama_messages,
-                stream=True,
-                options=Options(**params["options"]),
-                keep_alive=params["keep_alive"],
-                format=params["format"],
-            )
+            yield self._client.chat(**chat_params)

    def _chat_stream_with_aggregation(
        self,