From 78ad7a850302e67454e2572fe2ba295a1b236b54 Mon Sep 17 00:00:00 2001
From: balloonio <rzhangbolun@gmail.com>
Date: Thu, 18 Apr 2024 10:25:20 -0400
Subject: [PATCH] community[patch]: Invoke callback prior to yielding token fix
 [HuggingFaceTextGenInference] (#20426)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…gFaceTextGenInference)

- [x] **PR title**: community[patch]: Invoke callback prior to yielding
token fix for [HuggingFaceTextGenInference]


- [x] **PR message**:
- **Description:** Invoke callback prior to yielding token in stream
method in [HuggingFaceTextGenInference]
    - **Issue:** https://github.com/langchain-ai/langchain/issues/16913
    - **Dependencies:** None
    - **Twitter handle:** @bolun_zhang

If no one reviews your PR within a few days, please @-mention one of
baskaryan, efriis, eyurtsev, hwchase17.

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
---
 .../llms/huggingface_text_gen_inference.py                  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libs/community/langchain_community/llms/huggingface_text_gen_inference.py b/libs/community/langchain_community/llms/huggingface_text_gen_inference.py
index 9f56a949c6b..e053d172789 100644
--- a/libs/community/langchain_community/llms/huggingface_text_gen_inference.py
+++ b/libs/community/langchain_community/llms/huggingface_text_gen_inference.py
@@ -259,9 +259,10 @@ class HuggingFaceTextGenInference(LLM):
             # yield text, if any
             if text:
                 chunk = GenerationChunk(text=text)
-                yield chunk
+
                 if run_manager:
                     run_manager.on_llm_new_token(chunk.text)
+                yield chunk
 
             # break if stop sequence found
             if stop_seq_found:
@@ -295,9 +296,10 @@ class HuggingFaceTextGenInference(LLM):
             # yield text, if any
             if text:
                 chunk = GenerationChunk(text=text)
-                yield chunk
+
                 if run_manager:
                     await run_manager.on_llm_new_token(chunk.text)
+                yield chunk
 
             # break if stop sequence found
             if stop_seq_found: