community[patch]: invoke callback prior to yielding token (openai) (#19389)

**Description:** Invoke callback prior to yielding token for BaseOpenAI & OpenAIChat **Issue:** [Callback for on_llm_new_token should be invoked before the token is yielded by the model #16913](https://github.com/langchain-ai/langchain/issues/16913) **Dependencies:** None
2025-09-05 21:12:48 +00:00 · 2024-03-23 05:15:55 +05:30
parent 49e932cd24
commit 515aab3312
1 changed files with 3 additions and 3 deletions
--- a/libs/community/langchain_community/llms/openai.py
+++ b/libs/community/langchain_community/llms/openai.py
@@ -391,7 +391,6 @@ class BaseOpenAI(BaseLLM):
            if not isinstance(stream_resp, dict):
                stream_resp = stream_resp.dict()
            chunk = _stream_response_to_generation_chunk(stream_resp)
-            yield chunk
            if run_manager:
                await run_manager.on_llm_new_token(
                    chunk.text,
@@ -401,6 +400,7 @@ class BaseOpenAI(BaseLLM):
                    if chunk.generation_info
                    else None,
                )
+            yield chunk

    def _generate(
        self,
@@ -1113,9 +1113,9 @@ class OpenAIChat(BaseLLM):
                stream_resp = stream_resp.dict()
            token = stream_resp["choices"][0]["delta"].get("content", "")
            chunk = GenerationChunk(text=token)
-            yield chunk
            if run_manager:
                run_manager.on_llm_new_token(token, chunk=chunk)
+            yield chunk

    async def _astream(
        self,
@@ -1133,9 +1133,9 @@ class OpenAIChat(BaseLLM):
                stream_resp = stream_resp.dict()
            token = stream_resp["choices"][0]["delta"].get("content", "")
            chunk = GenerationChunk(text=token)
-            yield chunk
            if run_manager:
                await run_manager.on_llm_new_token(token, chunk=chunk)
+            yield chunk

    def _generate(
        self,