mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 23:54:14 +00:00
partners/ollama: Enabled Token Level Streaming when Using Bind Tools for ChatOllama (#27689)
**Description:** The issue concerns the unexpected behavior observed using the bind_tools method in LangChain's ChatOllama. When tools are not bound, the llm.stream() method works as expected, returning incremental chunks of content, which is crucial for real-time applications such as conversational agents and live feedback systems. However, when bind_tools([]) is used, the streaming behavior changes, causing the output to be delivered in full chunks rather than incrementally. This change negatively impacts the user experience by breaking the real-time nature of the streaming mechanism. **Issue:** #26971 --------- Co-authored-by: 4meyDam1e <amey.damle@mail.utoronto.ca> Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
776e3271e3
commit
d696728278
@ -327,7 +327,7 @@ class ChatOllama(BaseChatModel):
|
|||||||
"""Base url the model is hosted under."""
|
"""Base url the model is hosted under."""
|
||||||
|
|
||||||
client_kwargs: Optional[dict] = {}
|
client_kwargs: Optional[dict] = {}
|
||||||
"""Additional kwargs to pass to the httpx Client.
|
"""Additional kwargs to pass to the httpx Client.
|
||||||
For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
|
For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -475,26 +475,27 @@ class ChatOllama(BaseChatModel):
|
|||||||
params[key] = kwargs[key]
|
params[key] = kwargs[key]
|
||||||
|
|
||||||
params["options"]["stop"] = stop
|
params["options"]["stop"] = stop
|
||||||
if "tools" in kwargs:
|
|
||||||
yield await self._async_client.chat(
|
tools = kwargs.get("tools", None)
|
||||||
model=params["model"],
|
stream = tools is None or len(tools) == 0
|
||||||
messages=ollama_messages,
|
|
||||||
stream=False,
|
chat_params = {
|
||||||
options=Options(**params["options"]),
|
"model": params["model"],
|
||||||
keep_alive=params["keep_alive"],
|
"messages": ollama_messages,
|
||||||
format=params["format"],
|
"stream": stream,
|
||||||
tools=kwargs["tools"],
|
"options": Options(**params["options"]),
|
||||||
) # type:ignore
|
"keep_alive": params["keep_alive"],
|
||||||
else:
|
"format": params["format"],
|
||||||
async for part in await self._async_client.chat(
|
}
|
||||||
model=params["model"],
|
|
||||||
messages=ollama_messages,
|
if tools is not None:
|
||||||
stream=True,
|
chat_params["tools"] = tools
|
||||||
options=Options(**params["options"]),
|
|
||||||
keep_alive=params["keep_alive"],
|
if stream:
|
||||||
format=params["format"],
|
async for part in await self._async_client.chat(**chat_params):
|
||||||
): # type:ignore
|
|
||||||
yield part
|
yield part
|
||||||
|
else:
|
||||||
|
yield await self._async_client.chat(**chat_params)
|
||||||
|
|
||||||
def _create_chat_stream(
|
def _create_chat_stream(
|
||||||
self,
|
self,
|
||||||
@ -513,25 +514,26 @@ class ChatOllama(BaseChatModel):
|
|||||||
params[key] = kwargs[key]
|
params[key] = kwargs[key]
|
||||||
|
|
||||||
params["options"]["stop"] = stop
|
params["options"]["stop"] = stop
|
||||||
if "tools" in kwargs:
|
|
||||||
yield self._client.chat(
|
tools = kwargs.get("tools", None)
|
||||||
model=params["model"],
|
stream = tools is None or len(tools) == 0
|
||||||
messages=ollama_messages,
|
|
||||||
stream=False,
|
chat_params = {
|
||||||
options=Options(**params["options"]),
|
"model": params["model"],
|
||||||
keep_alive=params["keep_alive"],
|
"messages": ollama_messages,
|
||||||
format=params["format"],
|
"stream": stream,
|
||||||
tools=kwargs["tools"],
|
"options": Options(**params["options"]),
|
||||||
)
|
"keep_alive": params["keep_alive"],
|
||||||
|
"format": params["format"],
|
||||||
|
}
|
||||||
|
|
||||||
|
if tools is not None:
|
||||||
|
chat_params["tools"] = tools
|
||||||
|
|
||||||
|
if stream:
|
||||||
|
yield from self._client.chat(**chat_params)
|
||||||
else:
|
else:
|
||||||
yield from self._client.chat(
|
yield self._client.chat(**chat_params)
|
||||||
model=params["model"],
|
|
||||||
messages=ollama_messages,
|
|
||||||
stream=True,
|
|
||||||
options=Options(**params["options"]),
|
|
||||||
keep_alive=params["keep_alive"],
|
|
||||||
format=params["format"],
|
|
||||||
)
|
|
||||||
|
|
||||||
def _chat_stream_with_aggregation(
|
def _chat_stream_with_aggregation(
|
||||||
self,
|
self,
|
||||||
|
Loading…
Reference in New Issue
Block a user