diff --git a/libs/community/langchain_community/llms/llamafile.py b/libs/community/langchain_community/llms/llamafile.py index 5be6f4f2118..1aff521ee33 100644 --- a/libs/community/langchain_community/llms/llamafile.py +++ b/libs/community/langchain_community/llms/llamafile.py @@ -297,9 +297,10 @@ class Llamafile(LLM): for raw_chunk in response.iter_lines(decode_unicode=True): content = self._get_chunk_content(raw_chunk) chunk = GenerationChunk(text=content) - yield chunk + if run_manager: run_manager.on_llm_new_token(token=chunk.text) + yield chunk def _get_chunk_content(self, chunk: str) -> str: """When streaming is turned on, llamafile server returns lines like: