mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 15:43:54 +00:00
partners/ollama: Enabled Token Level Streaming when Using Bind Tools for ChatOllama (#27689)
**Description:** The issue concerns the unexpected behavior observed using the bind_tools method in LangChain's ChatOllama. When tools are not bound, the llm.stream() method works as expected, returning incremental chunks of content, which is crucial for real-time applications such as conversational agents and live feedback systems. However, when bind_tools([]) is used, the streaming behavior changes, causing the output to be delivered in full chunks rather than incrementally. This change negatively impacts the user experience by breaking the real-time nature of the streaming mechanism. **Issue:** #26971 --------- Co-authored-by: 4meyDam1e <amey.damle@mail.utoronto.ca> Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
776e3271e3
commit
d696728278
@ -327,7 +327,7 @@ class ChatOllama(BaseChatModel):
|
||||
"""Base url the model is hosted under."""
|
||||
|
||||
client_kwargs: Optional[dict] = {}
|
||||
"""Additional kwargs to pass to the httpx Client.
|
||||
"""Additional kwargs to pass to the httpx Client.
|
||||
For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
|
||||
"""
|
||||
|
||||
@ -475,26 +475,27 @@ class ChatOllama(BaseChatModel):
|
||||
params[key] = kwargs[key]
|
||||
|
||||
params["options"]["stop"] = stop
|
||||
if "tools" in kwargs:
|
||||
yield await self._async_client.chat(
|
||||
model=params["model"],
|
||||
messages=ollama_messages,
|
||||
stream=False,
|
||||
options=Options(**params["options"]),
|
||||
keep_alive=params["keep_alive"],
|
||||
format=params["format"],
|
||||
tools=kwargs["tools"],
|
||||
) # type:ignore
|
||||
else:
|
||||
async for part in await self._async_client.chat(
|
||||
model=params["model"],
|
||||
messages=ollama_messages,
|
||||
stream=True,
|
||||
options=Options(**params["options"]),
|
||||
keep_alive=params["keep_alive"],
|
||||
format=params["format"],
|
||||
): # type:ignore
|
||||
|
||||
tools = kwargs.get("tools", None)
|
||||
stream = tools is None or len(tools) == 0
|
||||
|
||||
chat_params = {
|
||||
"model": params["model"],
|
||||
"messages": ollama_messages,
|
||||
"stream": stream,
|
||||
"options": Options(**params["options"]),
|
||||
"keep_alive": params["keep_alive"],
|
||||
"format": params["format"],
|
||||
}
|
||||
|
||||
if tools is not None:
|
||||
chat_params["tools"] = tools
|
||||
|
||||
if stream:
|
||||
async for part in await self._async_client.chat(**chat_params):
|
||||
yield part
|
||||
else:
|
||||
yield await self._async_client.chat(**chat_params)
|
||||
|
||||
def _create_chat_stream(
|
||||
self,
|
||||
@ -513,25 +514,26 @@ class ChatOllama(BaseChatModel):
|
||||
params[key] = kwargs[key]
|
||||
|
||||
params["options"]["stop"] = stop
|
||||
if "tools" in kwargs:
|
||||
yield self._client.chat(
|
||||
model=params["model"],
|
||||
messages=ollama_messages,
|
||||
stream=False,
|
||||
options=Options(**params["options"]),
|
||||
keep_alive=params["keep_alive"],
|
||||
format=params["format"],
|
||||
tools=kwargs["tools"],
|
||||
)
|
||||
|
||||
tools = kwargs.get("tools", None)
|
||||
stream = tools is None or len(tools) == 0
|
||||
|
||||
chat_params = {
|
||||
"model": params["model"],
|
||||
"messages": ollama_messages,
|
||||
"stream": stream,
|
||||
"options": Options(**params["options"]),
|
||||
"keep_alive": params["keep_alive"],
|
||||
"format": params["format"],
|
||||
}
|
||||
|
||||
if tools is not None:
|
||||
chat_params["tools"] = tools
|
||||
|
||||
if stream:
|
||||
yield from self._client.chat(**chat_params)
|
||||
else:
|
||||
yield from self._client.chat(
|
||||
model=params["model"],
|
||||
messages=ollama_messages,
|
||||
stream=True,
|
||||
options=Options(**params["options"]),
|
||||
keep_alive=params["keep_alive"],
|
||||
format=params["format"],
|
||||
)
|
||||
yield self._client.chat(**chat_params)
|
||||
|
||||
def _chat_stream_with_aggregation(
|
||||
self,
|
||||
|
Loading…
Reference in New Issue
Block a user