diff --git a/docs/docs/integrations/chat/huggingface.ipynb b/docs/docs/integrations/chat/huggingface.ipynb index e665776a2b4..801471dc67c 100644 --- a/docs/docs/integrations/chat/huggingface.ipynb +++ b/docs/docs/integrations/chat/huggingface.ipynb @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -138,11 +138,36 @@ "from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint\n", "\n", "llm = HuggingFaceEndpoint(\n", - " repo_id=\"HuggingFaceH4/zephyr-7b-beta\",\n", + " repo_id=\"deepseek-ai/DeepSeek-R1-0528\",\n", " task=\"text-generation\",\n", " max_new_tokens=512,\n", " do_sample=False,\n", " repetition_penalty=1.03,\n", + " provider=\"auto\", # let Hugging Face choose the best provider for you\n", + ")\n", + "\n", + "chat_model = ChatHuggingFace(llm=llm)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's take advantage of [Inference Providers](https://huggingface.co/docs/inference-providers) to run the model on specific third-party providers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm = HuggingFaceEndpoint(\n", + " repo_id=\"deepseek-ai/DeepSeek-R1-0528\",\n", + " task=\"text-generation\",\n", + " provider=\"hyperbolic\", # set your provider here\n", + " # provider=\"nebius\",\n", + " # provider=\"together\",\n", ")\n", "\n", "chat_model = ChatHuggingFace(llm=llm)" diff --git a/docs/docs/integrations/llms/huggingface_endpoint.ipynb b/docs/docs/integrations/llms/huggingface_endpoint.ipynb index 933a4d1d524..ed54b27b7a6 100644 --- a/docs/docs/integrations/llms/huggingface_endpoint.ipynb +++ b/docs/docs/integrations/llms/huggingface_endpoint.ipynb @@ -117,7 +117,7 @@ "source": [ "## Examples\n", "\n", - "Here is an example of how you can access `HuggingFaceEndpoint` integration of the free [Serverless Endpoints](https://huggingface.co/inference-endpoints/serverless) API." + "Here is an example of how you can access `HuggingFaceEndpoint` integration of the serverless [Inference Providers](https://huggingface.co/docs/inference-providers) API.\n" ] }, { @@ -128,13 +128,17 @@ }, "outputs": [], "source": [ - "repo_id = \"mistralai/Mistral-7B-Instruct-v0.2\"\n", + "repo_id = \"deepseek-ai/DeepSeek-R1-0528\"\n", "\n", "llm = HuggingFaceEndpoint(\n", " repo_id=repo_id,\n", " max_length=128,\n", " temperature=0.5,\n", " huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,\n", + " provider=\"auto\", # set your provider here hf.co/settings/inference-providers\n", + " # provider=\"hyperbolic\",\n", + " # provider=\"nebius\",\n", + " # provider=\"together\",\n", ")\n", "llm_chain = prompt | llm\n", "print(llm_chain.invoke({\"question\": question}))" diff --git a/docs/docs/integrations/providers/huggingface.mdx b/docs/docs/integrations/providers/huggingface.mdx index da7d39c1c7a..b956fc60241 100644 --- a/docs/docs/integrations/providers/huggingface.mdx +++ b/docs/docs/integrations/providers/huggingface.mdx @@ -1,6 +1,11 @@ # Hugging Face -All functionality related to the [Hugging Face Platform](https://huggingface.co/). +All functionality related to [Hugging Face Hub](https://huggingface.co/) and libraries like [transformers](https://huggingface.co/docs/transformers/index), [sentence transformers](https://sbert.net/), and [datasets](https://huggingface.co/docs/datasets/index). + +> [Hugging Face](https://huggingface.co/) is an AI platform with all major open source models, datasets, MCPs, and demos. +> It supplies model inference locally and via serverless [Inference Providers](https://huggingface.co/docs/inference-providers). +> +> You can use [Inference Providers](https://huggingface.co/docs/inference-providers) to run open source models like DeepSeek R1 on scalable serverless infrastructure. ## Installation @@ -26,6 +31,7 @@ from langchain_huggingface import ChatHuggingFace ### HuggingFaceEndpoint +We can use the `HuggingFaceEndpoint` class to run open source models via serverless [Inference Providers](https://huggingface.co/docs/inference-providers) or via dedicated [Inference Endpoints](https://huggingface.co/inference-endpoints/dedicated). See a [usage example](/docs/integrations/llms/huggingface_endpoint). @@ -35,7 +41,7 @@ from langchain_huggingface import HuggingFaceEndpoint ### HuggingFacePipeline -Hugging Face models can be run locally through the `HuggingFacePipeline` class. +We can use the `HuggingFacePipeline` class to run open source models locally. See a [usage example](/docs/integrations/llms/huggingface_pipelines). @@ -47,6 +53,8 @@ from langchain_huggingface import HuggingFacePipeline ### HuggingFaceEmbeddings +We can use the `HuggingFaceEmbeddings` class to run open source embedding models locally. + See a [usage example](/docs/integrations/text_embedding/huggingfacehub). ```python @@ -55,6 +63,8 @@ from langchain_huggingface import HuggingFaceEmbeddings ### HuggingFaceEndpointEmbeddings +We can use the `HuggingFaceEndpointEmbeddings` class to run open source embedding models via a dedicated [Inference Endpoint](https://huggingface.co/inference-endpoints/dedicated). + See a [usage example](/docs/integrations/text_embedding/huggingfacehub). ```python @@ -63,6 +73,8 @@ from langchain_huggingface import HuggingFaceEndpointEmbeddings ### HuggingFaceInferenceAPIEmbeddings +We can use the `HuggingFaceInferenceAPIEmbeddings` class to run open source embedding models via [Inference Providers](https://huggingface.co/docs/inference-providers). + See a [usage example](/docs/integrations/text_embedding/huggingfacehub). ```python @@ -71,6 +83,8 @@ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings ### HuggingFaceInstructEmbeddings +We can use the `HuggingFaceInstructEmbeddings` class to run open source embedding models locally. + See a [usage example](/docs/integrations/text_embedding/instruct_embeddings). ```python diff --git a/docs/docs/integrations/text_embedding/huggingfacehub.ipynb b/docs/docs/integrations/text_embedding/huggingfacehub.ipynb index 568d5686012..4b8db414c19 100644 --- a/docs/docs/integrations/text_embedding/huggingfacehub.ipynb +++ b/docs/docs/integrations/text_embedding/huggingfacehub.ipynb @@ -95,35 +95,36 @@ "id": "92019ef1-5d30-4985-b4e6-c0d98bdfe265", "metadata": {}, "source": [ - "## Hugging Face Inference API\n", - "We can also access embedding models via the Hugging Face Inference API, which does not require us to install ``sentence_transformers`` and download models locally." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "66f5c6ba-1446-43e1-b012-800d17cef300", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Enter your HF Inference API Key:\n", - "\n", - " ········\n" - ] - } - ], - "source": [ - "import getpass\n", + "## Hugging Face Inference Providers\n", "\n", - "inference_api_key = getpass.getpass(\"Enter your HF Inference API Key:\\n\\n\")" + "We can also access embedding models via the [Inference Providers](https://huggingface.co/docs/inference-providers), which let's us use open source models on scalable serverless infrastructure.\n", + "\n", + "First, we need to get a read-only API key from [Hugging Face](https://huggingface.co/settings/tokens).\n" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, + "id": "c5576a6c", + "metadata": {}, + "outputs": [], + "source": [ + "from getpass import getpass\n", + "\n", + "huggingfacehub_api_token = getpass()" + ] + }, + { + "cell_type": "markdown", + "id": "3ad10337", + "metadata": {}, + "source": [ + "Now we can use the `HuggingFaceInferenceAPIEmbeddings` class to run open source embedding models via [Inference Providers](https://huggingface.co/docs/inference-providers)." + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "d0623c1f-cd82-4862-9bce-3655cb9b66ac", "metadata": {}, "outputs": [ @@ -139,10 +140,11 @@ } ], "source": [ - "from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings\n", + "from langchain_huggingface import HuggingFaceInferenceAPIEmbeddings\n", "\n", "embeddings = HuggingFaceInferenceAPIEmbeddings(\n", - " api_key=inference_api_key, model_name=\"sentence-transformers/all-MiniLM-l6-v2\"\n", + " api_key=huggingfacehub_api_token,\n", + " model_name=\"sentence-transformers/all-MiniLM-l6-v2\",\n", ")\n", "\n", "query_result = embeddings.embed_query(text)\n",