mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-31 18:38:48 +00:00
community[minor]: Add support for modle2vec embeddings (#28507)
This PR add an embeddings integration for model2vec, the `Model2vecEmbeddings` class. - **Description**: [Model2Vec](https://github.com/MinishLab/model2vec) lets you turn any sentence transformer into a really small static model and makes running the model faster. - **Issue**: - **Dependencies**: model2vec ([pypi](https://pypi.org/project/model2vec/)) - **Twitter handle:**: - [x] **Add tests and docs**: - [Test](https://github.com/blacksmithop/langchain/blob/model2vec_embeddings/libs/community/langchain_community/embeddings/model2vec.py), [docs](https://github.com/blacksmithop/langchain/blob/model2vec_embeddings/docs/docs/integrations/text_embedding/model2vec.ipynb) - [x] **Lint and test**: --------- Co-authored-by: Abhinav KM <abhinav.m@zerone-consulting.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
@@ -145,6 +145,9 @@ if TYPE_CHECKING:
|
||||
from langchain_community.embeddings.mlflow_gateway import (
|
||||
MlflowAIGatewayEmbeddings,
|
||||
)
|
||||
from langchain_community.embeddings.model2vec import (
|
||||
Model2vecEmbeddings,
|
||||
)
|
||||
from langchain_community.embeddings.modelscope_hub import (
|
||||
ModelScopeEmbeddings,
|
||||
)
|
||||
@@ -289,6 +292,7 @@ __all__ = [
|
||||
"MlflowAIGatewayEmbeddings",
|
||||
"MlflowCohereEmbeddings",
|
||||
"MlflowEmbeddings",
|
||||
"Model2vecEmbeddings",
|
||||
"ModelScopeEmbeddings",
|
||||
"MosaicMLInstructorEmbeddings",
|
||||
"NLPCloudEmbeddings",
|
||||
@@ -372,6 +376,7 @@ _module_lookup = {
|
||||
"MlflowAIGatewayEmbeddings": "langchain_community.embeddings.mlflow_gateway",
|
||||
"MlflowCohereEmbeddings": "langchain_community.embeddings.mlflow",
|
||||
"MlflowEmbeddings": "langchain_community.embeddings.mlflow",
|
||||
"Model2vecEmbeddings": "langchain_community.embeddings.model2vec",
|
||||
"ModelScopeEmbeddings": "langchain_community.embeddings.modelscope_hub",
|
||||
"MosaicMLInstructorEmbeddings": "langchain_community.embeddings.mosaicml",
|
||||
"NLPCloudEmbeddings": "langchain_community.embeddings.nlpcloud",
|
||||
|
66
libs/community/langchain_community/embeddings/model2vec.py
Normal file
66
libs/community/langchain_community/embeddings/model2vec.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""Wrapper around model2vec embedding models."""
|
||||
|
||||
from typing import List
|
||||
|
||||
from langchain_core.embeddings import Embeddings
|
||||
|
||||
|
||||
class Model2vecEmbeddings(Embeddings):
|
||||
"""model2v embedding models.
|
||||
|
||||
Install model2vec first, run 'pip install -U model2vec'.
|
||||
The github repository for model2vec is : https://github.com/MinishLab/model2vec
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.embeddings import Model2vecEmbeddings
|
||||
|
||||
embedding = Model2vecEmbeddings("minishlab/potion-base-8M")
|
||||
embedding.embed_documents([
|
||||
"It's dangerous to go alone!",
|
||||
"It's a secret to everybody.",
|
||||
])
|
||||
embedding.embed_query(
|
||||
"Take this with you."
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(self, model: str):
|
||||
"""Initialize embeddings.
|
||||
|
||||
Args:
|
||||
model: Model name.
|
||||
"""
|
||||
try:
|
||||
from model2vec import StaticModel
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Unable to import model2vec, please install with "
|
||||
"`pip install -U model2vec`."
|
||||
) from e
|
||||
self._model = StaticModel.from_pretrained(model)
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Embed documents using the model2vec embeddings model.
|
||||
|
||||
Args:
|
||||
texts: The list of texts to embed.
|
||||
|
||||
Returns:
|
||||
List of embeddings, one for each text.
|
||||
"""
|
||||
|
||||
return self._model.encode_as_sequence(texts)
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Embed a query using the model2vec embeddings model.
|
||||
|
||||
Args:
|
||||
text: The text to embed.
|
||||
|
||||
Returns:
|
||||
Embeddings for the text.
|
||||
"""
|
||||
|
||||
return self._model.encode(text)
|
@@ -26,6 +26,7 @@ EXPECTED_ALL = [
|
||||
"MlflowAIGatewayEmbeddings",
|
||||
"MlflowEmbeddings",
|
||||
"MlflowCohereEmbeddings",
|
||||
"Model2vecEmbeddings",
|
||||
"ModelScopeEmbeddings",
|
||||
"TensorflowHubEmbeddings",
|
||||
"SagemakerEndpointEmbeddings",
|
||||
|
11
libs/community/tests/unit_tests/embeddings/test_model2vec.py
Normal file
11
libs/community/tests/unit_tests/embeddings/test_model2vec.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from langchain_community.embeddings.model2vec import Model2vecEmbeddings
|
||||
|
||||
|
||||
def test_hugginggface_inferenceapi_embedding_documents_init() -> None:
|
||||
"""Test model2vec embeddings."""
|
||||
try:
|
||||
embedding = Model2vecEmbeddings("minishlab/potion-base-8M")
|
||||
assert len(embedding.embed_query("hi")) == 256
|
||||
except Exception:
|
||||
# model2vec is not installed
|
||||
assert True
|
Reference in New Issue
Block a user