From 4856a872616f072ef8db657f695bafa25dd0066a Mon Sep 17 00:00:00 2001
From: aditya thomas <aditya.thomas@alum.mit.edu>
Date: Sat, 23 Mar 2024 01:47:56 +0530
Subject: [PATCH] community[patch]: invoke callback prior to yielding token
 (llama.cpp) (#19392)

**Description:** Invoke callback prior to yielding token for llama.cpp
**Issue:** [Callback for on_llm_new_token should be invoked before the
token is yielded by the model
#16913](https://github.com/langchain-ai/langchain/issues/16913)
**Dependencies:** None
---
 libs/community/langchain_community/llms/llamacpp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/community/langchain_community/llms/llamacpp.py b/libs/community/langchain_community/llms/llamacpp.py
index 85acfb999e9..b06e6d8cf76 100644
--- a/libs/community/langchain_community/llms/llamacpp.py
+++ b/libs/community/langchain_community/llms/llamacpp.py
@@ -344,11 +344,11 @@ class LlamaCpp(LLM):
                 text=part["choices"][0]["text"],
                 generation_info={"logprobs": logprobs},
             )
-            yield chunk
             if run_manager:
                 run_manager.on_llm_new_token(
                     token=chunk.text, verbose=self.verbose, log_probs=logprobs
                 )
+            yield chunk
 
     def get_num_tokens(self, text: str) -> int:
         tokenized_text = self.client.tokenize(text.encode("utf-8"))