huggingface: init package (#21097)

First Pr for the langchain_huggingface partner Package - Moved some of the hugging face related class from `community` to the new `partner package` Still needed : - Documentation - Tests - Support for the new apply_chat_template in `ChatHuggingFace` - Confirm choice of class to support for embeddings witht he sentence-transformer team. cc : @efriis --------- Co-authored-by: Cyril Kondratenko <kkn1993@gmail.com> Co-authored-by: Erick Friis <erick@langchain.dev>
2025-09-06 05:25:04 +00:00 · 2024-05-13 22:53:15 +02:00
parent 9fce03e7db
commit afd85b60fc
33 changed files with 5299 additions and 133 deletions
--- a/docs/docs/integrations/chat/huggingface.ipynb
+++ b/docs/docs/integrations/chat/huggingface.ipynb
@@ -9,9 +9,10 @@
    "This notebook shows how to get started using `Hugging Face` LLM's as chat models.\n",
    "\n",
    "In particular, we will:\n",
-    "1. Utilize the [HuggingFaceTextGenInference](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/llms/huggingface_text_gen_inference.py), [HuggingFaceEndpoint](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/llms/huggingface_endpoint.py), or [HuggingFaceHub](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/llms/huggingface_hub.py) integrations to instantiate an `LLM`.\n",
-    "2. Utilize the `ChatHuggingFace` class to enable any of these LLMs to interface with LangChain's [Chat Messages](/docs/concepts#chat-models) abstraction.\n",
-    "3. Demonstrate how to use an open-source LLM to power an `ChatAgent` pipeline\n",
+    "1. Utilize the [HuggingFaceEndpoint](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/llms/huggingface_endpoint.py) integrations to instantiate an `LLM`.\n",
+    "2. Utilize the `ChatHuggingFace` class to enable any of these LLMs to interface with LangChain's [Chat Messages](/docs/concepts/#message-types) abstraction.\n",
+    "3. Explore tool calling with the `ChatHuggingFace`.\n",
+    "4. Demonstrate how to use an open-source LLM to power an `ChatAgent` pipeline\n",
    "\n",
    "\n",
    "> Note: To get started, you'll need to have a [Hugging Face Access Token](https://huggingface.co/docs/hub/security-tokens) saved as an environment variable: `HUGGINGFACEHUB_API_TOKEN`."
@@ -21,15 +22,7 @@
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "%pip install --upgrade --quiet  text-generation transformers google-search-results numexpr langchainhub sentencepiece jinja2"
   ]
@@ -38,44 +31,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## 1. Instantiate an LLM\n",
-    "\n",
-    "There are three LLM options to choose from."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### `HuggingFaceTextGenInference`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "from langchain_community.llms import HuggingFaceTextGenInference\n",
-    "\n",
-    "ENDPOINT_URL = \"<YOUR_ENDPOINT_URL_HERE>\"\n",
-    "HF_TOKEN = os.getenv(\"HUGGINGFACEHUB_API_TOKEN\")\n",
-    "\n",
-    "llm = HuggingFaceTextGenInference(\n",
-    "    inference_server_url=ENDPOINT_URL,\n",
-    "    max_new_tokens=512,\n",
-    "    top_k=50,\n",
-    "    temperature=0.1,\n",
-    "    repetition_penalty=1.03,\n",
-    "    server_kwargs={\n",
-    "        \"headers\": {\n",
-    "            \"Authorization\": f\"Bearer {HF_TOKEN}\",\n",
-    "            \"Content-Type\": \"application/json\",\n",
-    "        }\n",
-    "    },\n",
-    ")"
+    "## 1. Instantiate an LLM"
   ]
  },
  {
@@ -87,58 +43,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_community.llms import HuggingFaceEndpoint\n",
+    "from langchain_huggingface.llms import HuggingFaceEndpoint\n",
    "\n",
-    "ENDPOINT_URL = \"<YOUR_ENDPOINT_URL_HERE>\"\n",
    "llm = HuggingFaceEndpoint(\n",
-    "    endpoint_url=ENDPOINT_URL,\n",
+    "    repo_id=\"meta-llama/Meta-Llama-3-70B-Instruct\",\n",
    "    task=\"text-generation\",\n",
-    "    model_kwargs={\n",
-    "        \"max_new_tokens\": 512,\n",
-    "        \"top_k\": 50,\n",
-    "        \"temperature\": 0.1,\n",
-    "        \"repetition_penalty\": 1.03,\n",
-    "    },\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### `HuggingFaceHub`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jacoblee/langchain/langchain/libs/langchain/.venv/lib/python3.10/site-packages/huggingface_hub/utils/_deprecation.py:127: FutureWarning: '__init__' (from 'huggingface_hub.inference_api') is deprecated and will be removed from version '1.0'. `InferenceApi` client is deprecated in favor of the more feature-complete `InferenceClient`. Check out this guide to learn how to convert your script to use it: https://huggingface.co/docs/huggingface_hub/guides/inference#legacy-inferenceapi-client.\n",
-      "  warnings.warn(warning_message, FutureWarning)\n"
-     ]
-    }
-   ],
-   "source": [
-    "from langchain_community.llms import HuggingFaceHub\n",
-    "\n",
-    "llm = HuggingFaceHub(\n",
-    "    repo_id=\"HuggingFaceH4/zephyr-7b-beta\",\n",
-    "    task=\"text-generation\",\n",
-    "    model_kwargs={\n",
-    "        \"max_new_tokens\": 512,\n",
-    "        \"top_k\": 30,\n",
-    "        \"temperature\": 0.1,\n",
-    "        \"repetition_penalty\": 1.03,\n",
-    "    },\n",
+    "    max_new_tokens=512,\n",
+    "    do_sample=False,\n",
+    "    repetition_penalty=1.03,\n",
    ")"
   ]
  },
@@ -153,37 +69,30 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Instantiate the chat model and some messages to pass."
+    "Instantiate the chat model and some messages to pass. \n",
+    "\n",
+    "**Note**: you need to pass the `model_id` explicitly if you are using self-hosted `text-generation-inference`"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "WARNING! repo_id is not default parameter.\n",
-      "                    repo_id was transferred to model_kwargs.\n",
-      "                    Please confirm that repo_id is what you intended.\n",
-      "WARNING! task is not default parameter.\n",
-      "                    task was transferred to model_kwargs.\n",
-      "                    Please confirm that task is what you intended.\n",
-      "WARNING! huggingfacehub_api_token is not default parameter.\n",
-      "                    huggingfacehub_api_token was transferred to model_kwargs.\n",
-      "                    Please confirm that huggingfacehub_api_token is what you intended.\n",
-      "None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n"
+      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
-    "from langchain.schema import (\n",
+    "from langchain_core.messages import (\n",
    "    HumanMessage,\n",
    "    SystemMessage,\n",
    ")\n",
-    "from langchain_community.chat_models.huggingface import ChatHuggingFace\n",
+    "from langchain_huggingface.chat_models import ChatHuggingFace\n",
    "\n",
    "messages = [\n",
    "    SystemMessage(content=\"You're a helpful assistant\"),\n",
@@ -199,21 +108,21 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Inspect which model and corresponding chat template is being used."
+    "Check the `model_id`"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "'HuggingFaceH4/zephyr-7b-beta'"
+       "'meta-llama/Meta-Llama-3-70B-Instruct'"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -231,16 +140,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "\"<|system|>\\nYou're a helpful assistant</s>\\n<|user|>\\nWhat happens when an unstoppable force meets an immovable object?</s>\\n<|assistant|>\\n\""
+       "\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\\n\\nYou're a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\nWhat happens when an unstoppable force meets an immovable object?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n\""
      ]
     },
-     "execution_count": 7,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -258,14 +167,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "According to a popular philosophical paradox, when an unstoppable force meets an immovable object, it is impossible to determine which one will prevail because both are defined as being completely unyielding and unmovable. The paradox suggests that the very concepts of \"unstoppable force\" and \"immovable object\" are inherently contradictory, and therefore, it is illogical to imagine a scenario where they would meet and interact. However, in practical terms, it is highly unlikely for such a scenario to occur in the real world, as the concepts of \"unstoppable force\" and \"immovable object\" are often used metaphorically to describe hypothetical situations or abstract concepts, rather than physical objects or forces.\n"
+      "One of the classic thought experiments in physics!\n",
+      "\n",
+      "The concept of an unstoppable force meeting an immovable object is a paradox that has puzzled philosophers and physicists for centuries. It's a mind-bending scenario that challenges our understanding of the fundamental laws of physics.\n",
+      "\n",
+      "In essence, an unstoppable force is something that cannot be halted or slowed down, while an immovable object is something that cannot be moved or displaced. If we assume that both entities exist in the same universe, we run into a logical contradiction.\n",
+      "\n",
+      "Here\n"
     ]
    }
   ],
@@ -278,7 +193,71 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## 3. Take it for a spin as an agent!\n",
+    "## 3. Explore the tool calling with `ChatHuggingFace`\n",
+    "\n",
+    "`text-generation-inference` supports tool with open source LLMs starting from v2.0.1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a basic tool (`Calculator`):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "\n",
+    "\n",
+    "class Calculator(BaseModel):\n",
+    "    \"\"\"Multiply two integers together.\"\"\"\n",
+    "\n",
+    "    a: int = Field(..., description=\"First integer\")\n",
+    "    b: int = Field(..., description=\"Second integer\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Bind the tool to the `chat_model` and give it a try:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Calculator(a=3, b=12)]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.output_parsers.openai_tools import PydanticToolsParser\n",
+    "\n",
+    "llm_with_multiply = chat_model.bind_tools([Calculator], tool_choice=\"auto\")\n",
+    "parser = PydanticToolsParser(tools=[Calculator])\n",
+    "tool_chain = llm_with_multiply | parser\n",
+    "tool_chain.invoke(\"How much is 3 multiplied by 12?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Take it for a spin as an agent!\n",
    "\n",
    "Here we'll test out `Zephyr-7B-beta` as a zero-shot `ReAct` Agent. The example below is taken from [here](https://python.langchain.com/v0.1/docs/modules/agents/agent_types/react/#using-chat-models).\n",
    "\n",
@@ -287,7 +266,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -310,7 +289,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -342,7 +321,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
--- a/docs/docs/integrations/llms/huggingface_endpoint.ipynb
+++ b/docs/docs/integrations/llms/huggingface_endpoint.ipynb
@@ -20,7 +20,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_community.llms import HuggingFaceEndpoint"
+    "from langchain_huggingface.llms import HuggingFaceEndpoint"
   ]
  },
  {
@@ -83,7 +83,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_community.llms import HuggingFaceEndpoint"
+    "from langchain_huggingface.llms import HuggingFaceEndpoint"
   ]
  },
  {
@@ -193,7 +193,7 @@
   "outputs": [],
   "source": [
    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
-    "from langchain_community.llms import HuggingFaceEndpoint\n",
+    "from langchain_huggingface.llms import HuggingFaceEndpoint\n",
    "\n",
    "llm = HuggingFaceEndpoint(\n",
    "    endpoint_url=f\"{your_endpoint_url}\",\n",
--- a/docs/docs/integrations/llms/huggingface_pipelines.ipynb
+++ b/docs/docs/integrations/llms/huggingface_pipelines.ipynb
@@ -55,7 +55,7 @@
   },
   "outputs": [],
   "source": [
-    "from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n",
+    "from langchain_huggingface.llms import HuggingFacePipeline\n",
    "\n",
    "hf = HuggingFacePipeline.from_model_id(\n",
    "    model_id=\"gpt2\",\n",
@@ -79,7 +79,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n",
+    "from langchain_huggingface.llms import HuggingFacePipeline\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
    "\n",
    "model_id = \"gpt2\"\n",
--- a/docs/docs/integrations/text_embedding/huggingfacehub.ipynb
+++ b/docs/docs/integrations/text_embedding/huggingfacehub.ipynb
@@ -26,7 +26,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_community.embeddings import HuggingFaceEmbeddings"
+    "from langchain_huggingface.embeddings import HuggingFaceEmbeddings"
   ]
  },
  {
@@ -175,7 +175,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_community.embeddings import HuggingFaceHubEmbeddings"
+    "from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings"
   ]
  },
  {
@@ -185,7 +185,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "embeddings = HuggingFaceHubEmbeddings()"
+    "embeddings = HuggingFaceEndpointEmbeddings()"
   ]
  },
  {
--- a/docs/docs/integrations/text_embedding/text_embeddings_inference.ipynb
+++ b/docs/docs/integrations/text_embedding/text_embeddings_inference.ipynb
@@ -59,7 +59,7 @@
   },
   "outputs": [],
   "source": [
-    "from langchain_community.embeddings import HuggingFaceHubEmbeddings"
+    "from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings"
   ]
  },
  {
@@ -71,7 +71,7 @@
   },
   "outputs": [],
   "source": [
-    "embeddings = HuggingFaceHubEmbeddings(model=\"http://localhost:8080\")"
+    "embeddings = HuggingFaceEndpointEmbeddings(model=\"http://localhost:8080\")"
   ]
  },
  {