From 515aab3312be1b0859ba5f7ac947a7ea1d741db8 Mon Sep 17 00:00:00 2001
From: aditya thomas <aditya.thomas@alum.mit.edu>
Date: Sat, 23 Mar 2024 05:15:55 +0530
Subject: [PATCH] community[patch]: invoke callback prior to yielding token
 (openai) (#19389)

**Description:** Invoke callback prior to yielding token for BaseOpenAI
& OpenAIChat
**Issue:** [Callback for on_llm_new_token should be invoked before the
token is yielded by the model
#16913](https://github.com/langchain-ai/langchain/issues/16913)
**Dependencies:** None
---
 libs/community/langchain_community/llms/openai.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/community/langchain_community/llms/openai.py b/libs/community/langchain_community/llms/openai.py
index ce35db32e34..56ca15dda74 100644
--- a/libs/community/langchain_community/llms/openai.py
+++ b/libs/community/langchain_community/llms/openai.py
@@ -391,7 +391,6 @@ class BaseOpenAI(BaseLLM):
             if not isinstance(stream_resp, dict):
                 stream_resp = stream_resp.dict()
             chunk = _stream_response_to_generation_chunk(stream_resp)
-            yield chunk
             if run_manager:
                 await run_manager.on_llm_new_token(
                     chunk.text,
@@ -401,6 +400,7 @@ class BaseOpenAI(BaseLLM):
                     if chunk.generation_info
                     else None,
                 )
+            yield chunk
 
     def _generate(
         self,
@@ -1113,9 +1113,9 @@ class OpenAIChat(BaseLLM):
                 stream_resp = stream_resp.dict()
             token = stream_resp["choices"][0]["delta"].get("content", "")
             chunk = GenerationChunk(text=token)
-            yield chunk
             if run_manager:
                 run_manager.on_llm_new_token(token, chunk=chunk)
+            yield chunk
 
     async def _astream(
         self,
@@ -1133,9 +1133,9 @@ class OpenAIChat(BaseLLM):
                 stream_resp = stream_resp.dict()
             token = stream_resp["choices"][0]["delta"].get("content", "")
             chunk = GenerationChunk(text=token)
-            yield chunk
             if run_manager:
                 await run_manager.on_llm_new_token(token, chunk=chunk)
+            yield chunk
 
     def _generate(
         self,