community[minor]: Add support for modle2vec embeddings (#28507)

This PR add an embeddings integration for model2vec, the
`Model2vecEmbeddings` class.

- **Description**: [Model2Vec](https://github.com/MinishLab/model2vec)
lets you turn any sentence transformer into a really small static model
and makes running the model faster.
- **Issue**:
- **Dependencies**: model2vec
([pypi](https://pypi.org/project/model2vec/))
- **Twitter handle:**:

- [x] **Add tests and docs**: 
-
[Test](https://github.com/blacksmithop/langchain/blob/model2vec_embeddings/libs/community/langchain_community/embeddings/model2vec.py),
[docs](https://github.com/blacksmithop/langchain/blob/model2vec_embeddings/docs/docs/integrations/text_embedding/model2vec.ipynb)

- [x] **Lint and test**:

---------

Co-authored-by: Abhinav KM <abhinav.m@zerone-consulting.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Abhinav
2024-12-09 07:47:22 +05:30
committed by GitHub
parent fbf0704e48
commit 317a38b83e
5 changed files with 284 additions and 0 deletions

View File

@@ -145,6 +145,9 @@ if TYPE_CHECKING:
from langchain_community.embeddings.mlflow_gateway import (
MlflowAIGatewayEmbeddings,
)
from langchain_community.embeddings.model2vec import (
Model2vecEmbeddings,
)
from langchain_community.embeddings.modelscope_hub import (
ModelScopeEmbeddings,
)
@@ -289,6 +292,7 @@ __all__ = [
"MlflowAIGatewayEmbeddings",
"MlflowCohereEmbeddings",
"MlflowEmbeddings",
"Model2vecEmbeddings",
"ModelScopeEmbeddings",
"MosaicMLInstructorEmbeddings",
"NLPCloudEmbeddings",
@@ -372,6 +376,7 @@ _module_lookup = {
"MlflowAIGatewayEmbeddings": "langchain_community.embeddings.mlflow_gateway",
"MlflowCohereEmbeddings": "langchain_community.embeddings.mlflow",
"MlflowEmbeddings": "langchain_community.embeddings.mlflow",
"Model2vecEmbeddings": "langchain_community.embeddings.model2vec",
"ModelScopeEmbeddings": "langchain_community.embeddings.modelscope_hub",
"MosaicMLInstructorEmbeddings": "langchain_community.embeddings.mosaicml",
"NLPCloudEmbeddings": "langchain_community.embeddings.nlpcloud",

View File

@@ -0,0 +1,66 @@
"""Wrapper around model2vec embedding models."""
from typing import List
from langchain_core.embeddings import Embeddings
class Model2vecEmbeddings(Embeddings):
"""model2v embedding models.
Install model2vec first, run 'pip install -U model2vec'.
The github repository for model2vec is : https://github.com/MinishLab/model2vec
Example:
.. code-block:: python
from langchain_community.embeddings import Model2vecEmbeddings
embedding = Model2vecEmbeddings("minishlab/potion-base-8M")
embedding.embed_documents([
"It's dangerous to go alone!",
"It's a secret to everybody.",
])
embedding.embed_query(
"Take this with you."
)
"""
def __init__(self, model: str):
"""Initialize embeddings.
Args:
model: Model name.
"""
try:
from model2vec import StaticModel
except ImportError as e:
raise ImportError(
"Unable to import model2vec, please install with "
"`pip install -U model2vec`."
) from e
self._model = StaticModel.from_pretrained(model)
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Embed documents using the model2vec embeddings model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
return self._model.encode_as_sequence(texts)
def embed_query(self, text: str) -> List[float]:
"""Embed a query using the model2vec embeddings model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
return self._model.encode(text)

View File

@@ -26,6 +26,7 @@ EXPECTED_ALL = [
"MlflowAIGatewayEmbeddings",
"MlflowEmbeddings",
"MlflowCohereEmbeddings",
"Model2vecEmbeddings",
"ModelScopeEmbeddings",
"TensorflowHubEmbeddings",
"SagemakerEndpointEmbeddings",

View File

@@ -0,0 +1,11 @@
from langchain_community.embeddings.model2vec import Model2vecEmbeddings
def test_hugginggface_inferenceapi_embedding_documents_init() -> None:
"""Test model2vec embeddings."""
try:
embedding = Model2vecEmbeddings("minishlab/potion-base-8M")
assert len(embedding.embed_query("hi")) == 256
except Exception:
# model2vec is not installed
assert True