From e7b1a44c5b9852c1c1afd3bf63d410b4dae91358 Mon Sep 17 00:00:00 2001
From: balloonio <rzhangbolun@gmail.com>
Date: Fri, 12 Apr 2024 15:26:12 -0400
Subject: [PATCH] community[patch]: Invoke callback prior to yielding token fix
 for Llamafile (#20365)

- [x] **PR title**: community[patch]: Invoke callback prior to yielding
token fix for Llamafile


- [x] **PR message**:
- **Description:** Invoke callback prior to yielding token in stream
method in community llamafile.py
    - **Issue:** https://github.com/langchain-ai/langchain/issues/16913
    - **Dependencies:** None
    - **Twitter handle:** @bolun_zhang

If no one reviews your PR within a few days, please @-mention one of
baskaryan, efriis, eyurtsev, hwchase17.
---
 libs/community/langchain_community/llms/llamafile.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libs/community/langchain_community/llms/llamafile.py b/libs/community/langchain_community/llms/llamafile.py
index 5be6f4f2118..1aff521ee33 100644
--- a/libs/community/langchain_community/llms/llamafile.py
+++ b/libs/community/langchain_community/llms/llamafile.py
@@ -297,9 +297,10 @@ class Llamafile(LLM):
         for raw_chunk in response.iter_lines(decode_unicode=True):
             content = self._get_chunk_content(raw_chunk)
             chunk = GenerationChunk(text=content)
-            yield chunk
+
             if run_manager:
                 run_manager.on_llm_new_token(token=chunk.text)
+            yield chunk
 
     def _get_chunk_content(self, chunk: str) -> str:
         """When streaming is turned on, llamafile server returns lines like: