docs: huggingface package (#21645)

This commit is contained in:
Erick Friis 2024-05-13 20:17:40 -07:00 committed by GitHub
parent cd1879f5e7
commit 2a984e8e3f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 50 additions and 87 deletions

View File

@ -647,7 +647,7 @@ Sometimes you may not have the luxury of using OpenAI or other service-hosted la
import logging import logging
import torch import torch
from transformers import AutoTokenizer, GPT2TokenizerFast, pipeline, AutoModelForSeq2SeqLM, AutoModelForCausalLM from transformers import AutoTokenizer, GPT2TokenizerFast, pipeline, AutoModelForSeq2SeqLM, AutoModelForCausalLM
from langchain_community.llms import HuggingFacePipeline from langchain_huggingface import HuggingFacePipeline
# Note: This model requires a large GPU, e.g. an 80GB A100. See documentation for other ways to run private non-OpenAI models. # Note: This model requires a large GPU, e.g. an 80GB A100. See documentation for other ways to run private non-OpenAI models.
model_id = "google/flan-ul2" model_id = "google/flan-ul2"
@ -992,7 +992,7 @@ Now that you have some examples (with manually corrected output SQL), you can do
```python ```python
from langchain.prompts import FewShotPromptTemplate, PromptTemplate from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.chains.sql_database.prompt import _sqlite_prompt, PROMPT_SUFFIX from langchain.chains.sql_database.prompt import _sqlite_prompt, PROMPT_SUFFIX
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings from langchain_huggingface import HuggingFaceEmbeddings
from langchain.prompts.example_selector.semantic_similarity import SemanticSimilarityExampleSelector from langchain.prompts.example_selector.semantic_similarity import SemanticSimilarityExampleSelector
from langchain_community.vectorstores import Chroma from langchain_community.vectorstores import Chroma

View File

@ -21,7 +21,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"%pip install --upgrade --quiet sentence-transformers langchain-chroma langchain langchain-openai > /dev/null" "%pip install --upgrade --quiet sentence-transformers langchain-chroma langchain langchain-openai langchain-huggingface > /dev/null"
] ]
}, },
{ {
@ -57,7 +57,7 @@
"from langchain_community.document_transformers import (\n", "from langchain_community.document_transformers import (\n",
" LongContextReorder,\n", " LongContextReorder,\n",
")\n", ")\n",
"from langchain_community.embeddings import HuggingFaceEmbeddings\n", "from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_openai import OpenAI\n", "from langchain_openai import OpenAI\n",
"\n", "\n",
"# Get embeddings.\n", "# Get embeddings.\n",

View File

@ -24,7 +24,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"%pip install --upgrade --quiet text-generation transformers google-search-results numexpr langchainhub sentencepiece jinja2" "%pip install --upgrade --quiet langchain-huggingface text-generation transformers google-search-results numexpr langchainhub sentencepiece jinja2"
] ]
}, },
{ {
@ -47,7 +47,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_huggingface.llms import HuggingFaceEndpoint\n", "from langchain_huggingface import HuggingFaceEndpoint\n",
"\n", "\n",
"llm = HuggingFaceEndpoint(\n", "llm = HuggingFaceEndpoint(\n",
" repo_id=\"meta-llama/Meta-Llama-3-70B-Instruct\",\n", " repo_id=\"meta-llama/Meta-Llama-3-70B-Instruct\",\n",
@ -92,7 +92,7 @@
" HumanMessage,\n", " HumanMessage,\n",
" SystemMessage,\n", " SystemMessage,\n",
")\n", ")\n",
"from langchain_huggingface.chat_models import ChatHuggingFace\n", "from langchain_huggingface import ChatHuggingFace\n",
"\n", "\n",
"messages = [\n", "messages = [\n",
" SystemMessage(content=\"You're a helpful assistant\"),\n", " SystemMessage(content=\"You're a helpful assistant\"),\n",

View File

@ -67,8 +67,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import FAISS\n", "from langchain_community.vectorstores import FAISS\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n", "\n",
"documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n", "documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n",

View File

@ -20,7 +20,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_huggingface.llms import HuggingFaceEndpoint" "from langchain_huggingface import HuggingFaceEndpoint"
] ]
}, },
{ {
@ -83,7 +83,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_huggingface.llms import HuggingFaceEndpoint" "from langchain_huggingface import HuggingFaceEndpoint"
] ]
}, },
{ {
@ -193,7 +193,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
"from langchain_huggingface.llms import HuggingFaceEndpoint\n", "from langchain_huggingface import HuggingFaceEndpoint\n",
"\n", "\n",
"llm = HuggingFaceEndpoint(\n", "llm = HuggingFaceEndpoint(\n",
" endpoint_url=f\"{your_endpoint_url}\",\n", " endpoint_url=f\"{your_endpoint_url}\",\n",

View File

@ -152,7 +152,7 @@
} }
], ],
"source": [ "source": [
"from langchain_community.llms import HuggingFacePipeline\n", "from langchain_huggingface import HuggingFacePipeline\n",
"from transformers import pipeline\n", "from transformers import pipeline\n",
"\n", "\n",
"hf_model = pipeline(\n", "hf_model = pipeline(\n",

View File

@ -25,7 +25,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"%pip install --upgrade --quiet lm-format-enforcer > /dev/null" "%pip install --upgrade --quiet lm-format-enforcer langchain-huggingface > /dev/null"
] ]
}, },
{ {
@ -193,7 +193,7 @@
} }
], ],
"source": [ "source": [
"from langchain_community.llms import HuggingFacePipeline\n", "from langchain_huggingface import HuggingFacePipeline\n",
"from transformers import pipeline\n", "from transformers import pipeline\n",
"\n", "\n",
"hf_model = pipeline(\n", "hf_model = pipeline(\n",

View File

@ -78,7 +78,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n",
"from mlx_lm import load\n", "from mlx_lm import load\n",
"\n", "\n",
"model, tokenizer = load(\"mlx-community/quantized-gemma-2b-it\")\n", "model, tokenizer = load(\"mlx-community/quantized-gemma-2b-it\")\n",

View File

@ -55,7 +55,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n", "from langchain_huggingface import HuggingFacePipeline\n",
"\n", "\n",
"ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n",
"\n", "\n",

View File

@ -24,7 +24,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"%pip install --upgrade --quiet rellm > /dev/null" "%pip install --upgrade --quiet rellm langchain-huggingface > /dev/null"
] ]
}, },
{ {
@ -92,7 +92,7 @@
} }
], ],
"source": [ "source": [
"from langchain_community.llms import HuggingFacePipeline\n", "from langchain_huggingface import HuggingFacePipeline\n",
"from transformers import pipeline\n", "from transformers import pipeline\n",
"\n", "\n",
"hf_model = pipeline(\n", "hf_model = pipeline(\n",

View File

@ -85,7 +85,6 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from intel_extension_for_transformers.transformers import AutoModelForSeq2SeqLM\n", "from intel_extension_for_transformers.transformers import AutoModelForSeq2SeqLM\n",
"from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n",
"from transformers import AutoTokenizer, pipeline\n", "from transformers import AutoTokenizer, pipeline\n",
"\n", "\n",
"model_id = \"google/flan-t5-large\"\n", "model_id = \"google/flan-t5-large\"\n",

View File

@ -2,22 +2,24 @@
All functionality related to the [Hugging Face Platform](https://huggingface.co/). All functionality related to the [Hugging Face Platform](https://huggingface.co/).
## Installation
Most of the Hugging Face integrations are available in the `langchain-huggingface` package.
```bash
pip install langchain-huggingface
```
## Chat models ## Chat models
### Models from Hugging Face ### Models from Hugging Face
We can use the `Hugging Face` LLM classes or directly use the `ChatHuggingFace` class. We can use the `Hugging Face` LLM classes or directly use the `ChatHuggingFace` class.
We need to install several python packages.
```bash
pip install huggingface_hub
pip install transformers
```
See a [usage example](/docs/integrations/chat/huggingface). See a [usage example](/docs/integrations/chat/huggingface).
```python ```python
from langchain_community.chat_models.huggingface import ChatHuggingFace from langchain_huggingface import ChatHuggingFace
``` ```
## LLMs ## LLMs
@ -26,60 +28,23 @@ from langchain_community.chat_models.huggingface import ChatHuggingFace
Hugging Face models can be run locally through the `HuggingFacePipeline` class. Hugging Face models can be run locally through the `HuggingFacePipeline` class.
We need to install `transformers` python package.
```bash
pip install transformers
```
See a [usage example](/docs/integrations/llms/huggingface_pipelines). See a [usage example](/docs/integrations/llms/huggingface_pipelines).
```python ```python
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline from langchain_huggingface import HuggingFacePipeline
``` ```
To use the OpenVINO backend in local pipeline wrapper, please install the optimum library and set HuggingFacePipeline's backend as `openvino`:
```bash
pip install --upgrade-strategy eager "optimum[openvino,nncf]"
```
See a [usage example](/docs/integrations/llms/huggingface_pipelines)
To export your model to the OpenVINO IR format with the CLI:
```bash
optimum-cli export openvino --model gpt2 ov_model
```
To apply [weight-only quantization](https://github.com/huggingface/optimum-intel?tab=readme-ov-file#export) when exporting your model.
## Embedding Models ## Embedding Models
### Hugging Face Hub ### HuggingFaceEmbeddings
>The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is a platform
> with over 350k models, 75k datasets, and 150k demo apps (Spaces), all open source
> and publicly available, in an online platform where people can easily
> collaborate and build ML together. The Hub works as a central place where anyone
> can explore, experiment, collaborate, and build technology with Machine Learning.
We need to install the `sentence_transformers` python package.
```bash
pip install sentence_transformers
```
#### HuggingFaceEmbeddings
See a [usage example](/docs/integrations/text_embedding/huggingfacehub). See a [usage example](/docs/integrations/text_embedding/huggingfacehub).
```python ```python
from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_huggingface import HuggingFaceEmbeddings
``` ```
#### HuggingFaceInstructEmbeddings
### HuggingFaceInstructEmbeddings
See a [usage example](/docs/integrations/text_embedding/instruct_embeddings). See a [usage example](/docs/integrations/text_embedding/instruct_embeddings).
@ -87,7 +52,7 @@ See a [usage example](/docs/integrations/text_embedding/instruct_embeddings).
from langchain_community.embeddings import HuggingFaceInstructEmbeddings from langchain_community.embeddings import HuggingFaceInstructEmbeddings
``` ```
#### HuggingFaceBgeEmbeddings ### HuggingFaceBgeEmbeddings
>[BGE models on the HuggingFace](https://huggingface.co/BAAI/bge-large-en) are [the best open-source embedding models](https://huggingface.co/spaces/mteb/leaderboard). >[BGE models on the HuggingFace](https://huggingface.co/BAAI/bge-large-en) are [the best open-source embedding models](https://huggingface.co/spaces/mteb/leaderboard).
>BGE model is created by the [Beijing Academy of Artificial Intelligence (BAAI)](https://en.wikipedia.org/wiki/Beijing_Academy_of_Artificial_Intelligence). `BAAI` is a private non-profit organization engaged in AI research and development. >BGE model is created by the [Beijing Academy of Artificial Intelligence (BAAI)](https://en.wikipedia.org/wiki/Beijing_Academy_of_Artificial_Intelligence). `BAAI` is a private non-profit organization engaged in AI research and development.

View File

@ -17,7 +17,7 @@ pip install langchain-community sentence-transformers
``` ```
```python ```python
from langchain_community.text_embeddings import HuggingFaceEmbeddings from langchain_huggingface import HuggingFaceEmbeddings
model = HuggingFaceEmbeddings(model_name="snowflake/arctic-embed-l") model = HuggingFaceEmbeddings(model_name="snowflake/arctic-embed-l")
``` ```

View File

@ -41,7 +41,7 @@ docs = text_splitter.split_documents(documents)
from langchain_community.vectorstores import VDMS from langchain_community.vectorstores import VDMS
from langchain_community.vectorstores.vdms import VDMS_Client from langchain_community.vectorstores.vdms import VDMS_Client
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings from langchain_huggingface import HuggingFaceEmbeddings
client = VDMS_Client("localhost", 55555) client = VDMS_Client("localhost", 55555)
vectorstore = VDMS.from_documents( vectorstore = VDMS.from_documents(

View File

@ -33,7 +33,7 @@
" EmbeddingsClusteringFilter,\n", " EmbeddingsClusteringFilter,\n",
" EmbeddingsRedundantFilter,\n", " EmbeddingsRedundantFilter,\n",
")\n", ")\n",
"from langchain_community.embeddings import HuggingFaceEmbeddings\n", "from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"\n", "\n",
"# Get 3 diff embeddings.\n", "# Get 3 diff embeddings.\n",

View File

@ -41,7 +41,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_community.embeddings import HuggingFaceEmbeddings" "from langchain_huggingface import HuggingFaceEmbeddings"
] ]
}, },
{ {

View File

@ -52,8 +52,8 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import Annoy\n", "from langchain_community.vectorstores import Annoy\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"\n", "\n",
"embeddings_func = HuggingFaceEmbeddings()" "embeddings_func = HuggingFaceEmbeddings()"
] ]

View File

@ -328,7 +328,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n", "from langchain_huggingface import HuggingFaceEmbeddings\n",
"\n", "\n",
"pkl = db.serialize_to_bytes() # serializes the faiss\n", "pkl = db.serialize_to_bytes() # serializes the faiss\n",
"embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n", "embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",

View File

@ -158,7 +158,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n", "from langchain_huggingface import HuggingFaceEmbeddings\n",
"\n", "\n",
"pkl = db.serialize_to_bytes() # serializes the faiss index\n", "pkl = db.serialize_to_bytes() # serializes the faiss index\n",
"embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n", "embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",

View File

@ -91,11 +91,11 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import oraclevs\n", "from langchain_community.vectorstores import oraclevs\n",
"from langchain_community.vectorstores.oraclevs import OracleVS\n", "from langchain_community.vectorstores.oraclevs import OracleVS\n",
"from langchain_community.vectorstores.utils import DistanceStrategy\n", "from langchain_community.vectorstores.utils import DistanceStrategy\n",
"from langchain_core.documents import Document" "from langchain_core.documents import Document\n",
"from langchain_huggingface import HuggingFaceEmbeddings"
] ]
}, },
{ {

View File

@ -60,8 +60,8 @@
], ],
"source": [ "source": [
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import ScaNN\n", "from langchain_community.vectorstores import ScaNN\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n", "from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"state_of_the_union.txt\")\n", "loader = TextLoader(\"state_of_the_union.txt\")\n",

View File

@ -41,7 +41,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_community.embeddings import HuggingFaceEmbeddings\n", "from langchain_huggingface import HuggingFaceEmbeddings\n",
"\n", "\n",
"embeddings = HuggingFaceEmbeddings()" "embeddings = HuggingFaceEmbeddings()"
] ]

View File

@ -74,8 +74,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import SurrealDBStore\n", "from langchain_community.vectorstores import SurrealDBStore\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter" "from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },

View File

@ -44,8 +44,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import TileDB\n", "from langchain_community.vectorstores import TileDB\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n", "from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"raw_documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n", "raw_documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n",

View File

@ -43,8 +43,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import Vald\n", "from langchain_community.vectorstores import Vald\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n", "from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"raw_documents = TextLoader(\"state_of_the_union.txt\").load()\n", "raw_documents = TextLoader(\"state_of_the_union.txt\").load()\n",
@ -190,8 +190,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import Vald\n", "from langchain_community.vectorstores import Vald\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n", "from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"raw_documents = TextLoader(\"state_of_the_union.txt\").load()\n", "raw_documents = TextLoader(\"state_of_the_union.txt\").load()\n",

View File

@ -92,9 +92,9 @@
"import time\n", "import time\n",
"\n", "\n",
"from langchain_community.document_loaders.text import TextLoader\n", "from langchain_community.document_loaders.text import TextLoader\n",
"from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import VDMS\n", "from langchain_community.vectorstores import VDMS\n",
"from langchain_community.vectorstores.vdms import VDMS_Client\n", "from langchain_community.vectorstores.vdms import VDMS_Client\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_text_splitters.character import CharacterTextSplitter\n", "from langchain_text_splitters.character import CharacterTextSplitter\n",
"\n", "\n",
"time.sleep(2)\n", "time.sleep(2)\n",

View File

@ -53,8 +53,8 @@
], ],
"source": [ "source": [
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores.vearch import Vearch\n", "from langchain_community.vectorstores.vearch import Vearch\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"from transformers import AutoModel, AutoTokenizer\n", "from transformers import AutoModel, AutoTokenizer\n",
"\n", "\n",

View File

@ -19,7 +19,7 @@ class HuggingFaceEndpointEmbeddings(BaseModel, Embeddings):
Example: Example:
.. code-block:: python .. code-block:: python
from langchain_community.embeddings import HuggingFaceEndpointEmbeddings from langchain_huggingface import HuggingFaceEndpointEmbeddings
model = "sentence-transformers/all-mpnet-base-v2" model = "sentence-transformers/all-mpnet-base-v2"
hf = HuggingFaceEndpointEmbeddings( hf = HuggingFaceEndpointEmbeddings(
model=model, model=model,