partners/ollama: Enabled Token Level Streaming when Using Bind Tools for ChatOllama (#27689)

**Description:** The issue concerns the unexpected behavior observed
using the bind_tools method in LangChain's ChatOllama. When tools are
not bound, the llm.stream() method works as expected, returning
incremental chunks of content, which is crucial for real-time
applications such as conversational agents and live feedback systems.
However, when bind_tools([]) is used, the streaming behavior changes,
causing the output to be delivered in full chunks rather than
incrementally. This change negatively impacts the user experience by
breaking the real-time nature of the streaming mechanism.
**Issue:** #26971

---------

Co-authored-by: 4meyDam1e <amey.damle@mail.utoronto.ca>
Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
Elham Badri 2024-11-15 11:36:27 -05:00 committed by GitHub
parent 776e3271e3
commit d696728278
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -327,7 +327,7 @@ class ChatOllama(BaseChatModel):
"""Base url the model is hosted under.""" """Base url the model is hosted under."""
client_kwargs: Optional[dict] = {} client_kwargs: Optional[dict] = {}
"""Additional kwargs to pass to the httpx Client. """Additional kwargs to pass to the httpx Client.
For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html) For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
""" """
@ -475,26 +475,27 @@ class ChatOllama(BaseChatModel):
params[key] = kwargs[key] params[key] = kwargs[key]
params["options"]["stop"] = stop params["options"]["stop"] = stop
if "tools" in kwargs:
yield await self._async_client.chat( tools = kwargs.get("tools", None)
model=params["model"], stream = tools is None or len(tools) == 0
messages=ollama_messages,
stream=False, chat_params = {
options=Options(**params["options"]), "model": params["model"],
keep_alive=params["keep_alive"], "messages": ollama_messages,
format=params["format"], "stream": stream,
tools=kwargs["tools"], "options": Options(**params["options"]),
) # type:ignore "keep_alive": params["keep_alive"],
else: "format": params["format"],
async for part in await self._async_client.chat( }
model=params["model"],
messages=ollama_messages, if tools is not None:
stream=True, chat_params["tools"] = tools
options=Options(**params["options"]),
keep_alive=params["keep_alive"], if stream:
format=params["format"], async for part in await self._async_client.chat(**chat_params):
): # type:ignore
yield part yield part
else:
yield await self._async_client.chat(**chat_params)
def _create_chat_stream( def _create_chat_stream(
self, self,
@ -513,25 +514,26 @@ class ChatOllama(BaseChatModel):
params[key] = kwargs[key] params[key] = kwargs[key]
params["options"]["stop"] = stop params["options"]["stop"] = stop
if "tools" in kwargs:
yield self._client.chat( tools = kwargs.get("tools", None)
model=params["model"], stream = tools is None or len(tools) == 0
messages=ollama_messages,
stream=False, chat_params = {
options=Options(**params["options"]), "model": params["model"],
keep_alive=params["keep_alive"], "messages": ollama_messages,
format=params["format"], "stream": stream,
tools=kwargs["tools"], "options": Options(**params["options"]),
) "keep_alive": params["keep_alive"],
"format": params["format"],
}
if tools is not None:
chat_params["tools"] = tools
if stream:
yield from self._client.chat(**chat_params)
else: else:
yield from self._client.chat( yield self._client.chat(**chat_params)
model=params["model"],
messages=ollama_messages,
stream=True,
options=Options(**params["options"]),
keep_alive=params["keep_alive"],
format=params["format"],
)
def _chat_stream_with_aggregation( def _chat_stream_with_aggregation(
self, self,