From e135dc70c360c779a6d5d0ca7ae48e3dc5f51083 Mon Sep 17 00:00:00 2001
From: Robby <45851384+h0rv@users.noreply.github.com>
Date: Mon, 12 Feb 2024 22:22:55 -0500
Subject: [PATCH] community[patch]: Invoke callback prior to yielding token
 (#17348)

**Description:** Invoke callback prior to yielding token in stream
method for Ollama.
**Issue:** [Callback for on_llm_new_token should be invoked before the
token is yielded by the model
#16913](https://github.com/langchain-ai/langchain/issues/16913)

Co-authored-by: Robby <h0rv@users.noreply.github.com>
---
 libs/community/langchain_community/chat_models/ollama.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/community/langchain_community/chat_models/ollama.py b/libs/community/langchain_community/chat_models/ollama.py
index 92b5afb52ba..73a194a90f9 100644
--- a/libs/community/langchain_community/chat_models/ollama.py
+++ b/libs/community/langchain_community/chat_models/ollama.py
@@ -313,12 +313,12 @@ class ChatOllama(BaseChatModel, _OllamaCommon):
             for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
                 if stream_resp:
                     chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp)
-                    yield chunk
                     if run_manager:
                         run_manager.on_llm_new_token(
                             chunk.text,
                             verbose=self.verbose,
                         )
+                    yield chunk
         except OllamaEndpointNotFoundError:
             yield from self._legacy_stream(messages, stop, **kwargs)
 
@@ -332,12 +332,12 @@ class ChatOllama(BaseChatModel, _OllamaCommon):
         async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
             if stream_resp:
                 chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp)
-                yield chunk
                 if run_manager:
                     await run_manager.on_llm_new_token(
                         chunk.text,
                         verbose=self.verbose,
                     )
+                yield chunk
 
     @deprecated("0.0.3", alternative="_stream")
     def _legacy_stream(
@@ -351,9 +351,9 @@ class ChatOllama(BaseChatModel, _OllamaCommon):
         for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
             if stream_resp:
                 chunk = _stream_response_to_chat_generation_chunk(stream_resp)
-                yield chunk
                 if run_manager:
                     run_manager.on_llm_new_token(
                         chunk.text,
                         verbose=self.verbose,
                     )
+                yield chunk