community[patch]: callback before yield for _stream/_astream (#17907)

- Description: callback on_llm_new_token before yield chunk for _stream/_astream for some chat models, make all chat models in a consistent behaviour. - Issue: N/A - Dependencies: N/A
2025-09-12 12:59:07 +00:00 · 2024-02-23 08:15:21 +08:00
parent 15e42f1799
commit 9678797625
22 changed files with 66 additions and 55 deletions
--- a/libs/community/langchain_community/chat_models/fireworks.py
+++ b/libs/community/langchain_community/chat_models/fireworks.py
@@ -219,10 +219,12 @@ class ChatFireworks(BaseChatModel):
                dict(finish_reason=finish_reason) if finish_reason is not None else None
            )
            default_chunk_class = chunk.__class__
-            chunk = ChatGenerationChunk(message=chunk, generation_info=generation_info)
-            yield chunk
+            cg_chunk = ChatGenerationChunk(
+                message=chunk, generation_info=generation_info
+            )
            if run_manager:
-                run_manager.on_llm_new_token(chunk.text, chunk=chunk)
+                run_manager.on_llm_new_token(chunk.text, chunk=cg_chunk)
+            yield cg_chunk

    async def _astream(
        self,
@@ -250,10 +252,12 @@ class ChatFireworks(BaseChatModel):
                dict(finish_reason=finish_reason) if finish_reason is not None else None
            )
            default_chunk_class = chunk.__class__
-            chunk = ChatGenerationChunk(message=chunk, generation_info=generation_info)
-            yield chunk
+            cg_chunk = ChatGenerationChunk(
+                message=chunk, generation_info=generation_info
+            )
            if run_manager:
-                await run_manager.on_llm_new_token(token=chunk.text, chunk=chunk)
+                await run_manager.on_llm_new_token(token=chunk.text, chunk=cg_chunk)
+            yield cg_chunk


 def conditional_decorator(