diff --git a/docs/docs/integrations/chat/huggingface.ipynb b/docs/docs/integrations/chat/huggingface.ipynb index 6bdb04870ef..f203d29f685 100644 --- a/docs/docs/integrations/chat/huggingface.ipynb +++ b/docs/docs/integrations/chat/huggingface.ipynb @@ -58,6 +58,62 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### `HuggingFacePipeline`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_huggingface import HuggingFacePipeline\n", + "\n", + "llm = HuggingFacePipeline.from_model_id(\n", + " model_id=\"HuggingFaceH4/zephyr-7b-beta\",\n", + " task=\"text-generation\",\n", + " pipeline_kwargs=dict(\n", + " max_new_tokens=512,\n", + " do_sample=False,\n", + " repetition_penalty=1.03,\n", + " ),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To run a quantized version, you might specify a `bitsandbytes` quantization config as follows:\n", + "\n", + "```python\n", + "from transformers import BitsAndBytesConfig\n", + "\n", + "quantization_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_compute_dtype=\"float16\",\n", + " bnb_4bit_use_double_quant=True\n", + ")\n", + "```\n", + "\n", + "and pass it to the `HuggingFacePipeline` as a part of its `model_kwargs`:\n", + "\n", + "```python\n", + "pipeline = HuggingFacePipeline(\n", + " ...\n", + "\n", + " model_kwargs={\"quantization_config\": quantization_config},\n", + " \n", + " ...\n", + ")\n", + "```" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py b/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py index 8cbb274477e..9b3564d5302 100644 --- a/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py +++ b/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py @@ -35,6 +35,7 @@ from langchain_core.tools import BaseTool from langchain_core.utils.function_calling import convert_to_openai_tool from langchain_huggingface.llms.huggingface_endpoint import HuggingFaceEndpoint +from langchain_huggingface.llms.huggingface_pipeline import HuggingFacePipeline DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful, and honest assistant.""" @@ -135,6 +136,10 @@ def _is_huggingface_endpoint(llm: Any) -> bool: return isinstance(llm, HuggingFaceEndpoint) +def _is_huggingface_pipeline(llm: Any) -> bool: + return isinstance(llm, HuggingFacePipeline) + + class ChatHuggingFace(BaseChatModel): """ Wrapper for using Hugging Face LLM's as ChatModels. @@ -150,8 +155,8 @@ class ChatHuggingFace(BaseChatModel): """ llm: Any - """LLM, must be of type HuggingFaceTextGenInference, HuggingFaceEndpoint, or - HuggingFaceHub.""" + """LLM, must be of type HuggingFaceTextGenInference, HuggingFaceEndpoint, + HuggingFaceHub, or HuggingFacePipeline.""" system_message: SystemMessage = SystemMessage(content=DEFAULT_SYSTEM_PROMPT) tokenizer: Any = None model_id: Optional[str] = None @@ -175,10 +180,12 @@ class ChatHuggingFace(BaseChatModel): not _is_huggingface_hub(values["llm"]) and not _is_huggingface_textgen_inference(values["llm"]) and not _is_huggingface_endpoint(values["llm"]) + and not _is_huggingface_pipeline(values["llm"]) ): raise TypeError( "Expected llm to be one of HuggingFaceTextGenInference, " - f"HuggingFaceEndpoint, HuggingFaceHub, received {type(values['llm'])}" + "HuggingFaceEndpoint, HuggingFaceHub, HuggingFacePipeline " + f"received {type(values['llm'])}" ) return values @@ -293,6 +300,9 @@ class ChatHuggingFace(BaseChatModel): return elif _is_huggingface_textgen_inference(self.llm): endpoint_url: Optional[str] = self.llm.inference_server_url + elif _is_huggingface_pipeline(self.llm): + self.model_id = self.llm.model_id + return else: endpoint_url = self.llm.endpoint_url