huggingface[patch]: Support for HuggingFacePipeline in ChatHuggingFace. (#22194)

- **Description:** Added support for using HuggingFacePipeline in
ChatHuggingFace (previously it was only usable with API endpoints,
probably by oversight).
- **Issue:** #19997 
- **Dependencies:** none
- **Twitter handle:** none

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
Michal Gregor
2024-06-04 02:47:35 +02:00
committed by GitHub
parent 0061ded002
commit 98b2e7b195
2 changed files with 69 additions and 3 deletions

View File

@@ -58,6 +58,62 @@
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### `HuggingFacePipeline`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain_huggingface import HuggingFacePipeline\n",
"\n",
"llm = HuggingFacePipeline.from_model_id(\n",
" model_id=\"HuggingFaceH4/zephyr-7b-beta\",\n",
" task=\"text-generation\",\n",
" pipeline_kwargs=dict(\n",
" max_new_tokens=512,\n",
" do_sample=False,\n",
" repetition_penalty=1.03,\n",
" ),\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To run a quantized version, you might specify a `bitsandbytes` quantization config as follows:\n",
"\n",
"```python\n",
"from transformers import BitsAndBytesConfig\n",
"\n",
"quantization_config = BitsAndBytesConfig(\n",
" load_in_4bit=True,\n",
" bnb_4bit_quant_type=\"nf4\",\n",
" bnb_4bit_compute_dtype=\"float16\",\n",
" bnb_4bit_use_double_quant=True\n",
")\n",
"```\n",
"\n",
"and pass it to the `HuggingFacePipeline` as a part of its `model_kwargs`:\n",
"\n",
"```python\n",
"pipeline = HuggingFacePipeline(\n",
" ...\n",
"\n",
" model_kwargs={\"quantization_config\": quantization_config},\n",
" \n",
" ...\n",
")\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},

View File

@@ -35,6 +35,7 @@ from langchain_core.tools import BaseTool
from langchain_core.utils.function_calling import convert_to_openai_tool
from langchain_huggingface.llms.huggingface_endpoint import HuggingFaceEndpoint
from langchain_huggingface.llms.huggingface_pipeline import HuggingFacePipeline
DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful, and honest assistant."""
@@ -135,6 +136,10 @@ def _is_huggingface_endpoint(llm: Any) -> bool:
return isinstance(llm, HuggingFaceEndpoint)
def _is_huggingface_pipeline(llm: Any) -> bool:
return isinstance(llm, HuggingFacePipeline)
class ChatHuggingFace(BaseChatModel):
"""
Wrapper for using Hugging Face LLM's as ChatModels.
@@ -150,8 +155,8 @@ class ChatHuggingFace(BaseChatModel):
"""
llm: Any
"""LLM, must be of type HuggingFaceTextGenInference, HuggingFaceEndpoint, or
HuggingFaceHub."""
"""LLM, must be of type HuggingFaceTextGenInference, HuggingFaceEndpoint,
HuggingFaceHub, or HuggingFacePipeline."""
system_message: SystemMessage = SystemMessage(content=DEFAULT_SYSTEM_PROMPT)
tokenizer: Any = None
model_id: Optional[str] = None
@@ -175,10 +180,12 @@ class ChatHuggingFace(BaseChatModel):
not _is_huggingface_hub(values["llm"])
and not _is_huggingface_textgen_inference(values["llm"])
and not _is_huggingface_endpoint(values["llm"])
and not _is_huggingface_pipeline(values["llm"])
):
raise TypeError(
"Expected llm to be one of HuggingFaceTextGenInference, "
f"HuggingFaceEndpoint, HuggingFaceHub, received {type(values['llm'])}"
"HuggingFaceEndpoint, HuggingFaceHub, HuggingFacePipeline "
f"received {type(values['llm'])}"
)
return values
@@ -293,6 +300,9 @@ class ChatHuggingFace(BaseChatModel):
return
elif _is_huggingface_textgen_inference(self.llm):
endpoint_url: Optional[str] = self.llm.inference_server_url
elif _is_huggingface_pipeline(self.llm):
self.model_id = self.llm.model_id
return
else:
endpoint_url = self.llm.endpoint_url