community[patch]: Add streaming logic in ChatHuggingFace (#18784)

- Add functions (_stream, _astream) - Connect to _generate and _agenerate Thank you for contributing to LangChain! - [x] **PR title**: "community: Add streaming logic in ChatHuggingFace" - [x] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** Addition functions (_stream, _astream) and connection to _generate and _agenerate - **Issue:** #18782 - **Dependencies:** none - **Twitter handle:** @lunara_x
2025-08-12 06:13:36 +00:00 · 2024-04-17 11:17:03 +09:00 · 2024-04-17 11:17:03 +09:00 · b34f1086fe
commit b34f1086fe
parent c05c379b26
1 changed files with 59 additions and 5 deletions
--- a/libs/community/langchain_community/chat_models/huggingface.py
+++ b/libs/community/langchain_community/chat_models/huggingface.py
@ -1,19 +1,28 @@
 """Hugging Face Chat Wrapper."""
-
+from typing import Any, AsyncIterator, Iterator, List, Optional
 from typing import Any, List, Optional
 from langchain_core.callbacks.manager import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
 )
-from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.language_models.chat_models import (
    BaseChatModel,
    agenerate_from_stream,
    generate_from_stream,
 )
 from langchain_core.messages import (
    AIMessage,
    AIMessageChunk,
    BaseMessage,
    HumanMessage,
    SystemMessage,
 )
-from langchain_core.outputs import ChatGeneration, ChatResult, LLMResult
+from langchain_core.outputs import (
    ChatGeneration,
    ChatGenerationChunk,
    ChatResult,
    LLMResult,
 )
 from langchain_core.pydantic_v1 import root_validator
 from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
@ -26,7 +35,8 @@ DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful, and honest assistant."
 class ChatHuggingFace(BaseChatModel):
-    """Hugging Face LLMs as ChatModels.
+    """
    Wrapper for using Hugging Face LLM's as ChatModels.
    Works with `HuggingFaceTextGenInference`, `HuggingFaceEndpoint`,
    and `HuggingFaceHub` LLMs.
@ -44,6 +54,7 @@ class ChatHuggingFace(BaseChatModel):
    system_message: SystemMessage = SystemMessage(content=DEFAULT_SYSTEM_PROMPT)
    tokenizer: Any = None
    model_id: Optional[str] = None
    streaming: bool = False
    def __init__(self, **kwargs: Any):
        super().__init__(**kwargs)
@ -70,6 +81,37 @@ class ChatHuggingFace(BaseChatModel):
            )
        return values
    def _stream(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> Iterator[ChatGenerationChunk]:
        request = self._to_chat_prompt(messages)
        for data in self.llm.stream(request, **kwargs):
            delta = data
            chunk = ChatGenerationChunk(message=AIMessageChunk(content=delta))
            if run_manager:
                run_manager.on_llm_new_token(delta, chunk=chunk)
            yield chunk
    async def _astream(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> AsyncIterator[ChatGenerationChunk]:
        request = self._to_chat_prompt(messages)
        async for data in self.llm.astream(request, **kwargs):
            delta = data
            chunk = ChatGenerationChunk(message=AIMessageChunk(content=delta))
            if run_manager:
                await run_manager.on_llm_new_token(delta, chunk=chunk)
            yield chunk
    def _generate(
        self,
        messages: List[BaseMessage],
@ -77,6 +119,12 @@ class ChatHuggingFace(BaseChatModel):
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> ChatResult:
        if self.streaming:
            stream_iter = self._stream(
                messages, stop=stop, run_manager=run_manager, **kwargs
            )
            return generate_from_stream(stream_iter)
        llm_input = self._to_chat_prompt(messages)
        llm_result = self.llm._generate(
            prompts=[llm_input], stop=stop, run_manager=run_manager, **kwargs
@ -90,6 +138,12 @@ class ChatHuggingFace(BaseChatModel):
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> ChatResult:
        if self.streaming:
            stream_iter = self._astream(
                messages, stop=stop, run_manager=run_manager, **kwargs
            )
            return await agenerate_from_stream(stream_iter)
        llm_input = self._to_chat_prompt(messages)
        llm_result = await self.llm._agenerate(
            prompts=[llm_input], stop=stop, run_manager=run_manager, **kwargs