mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 14:18:52 +00:00
community: fixed bug in model2vec embedding code (#28670)
This PR fixes a bug with the current implementation for Model2Vec embeddings where `embed_documents` does not work as expected. - **Description**: the current implementation uses `encode_as_sequence` for encoding documents. This is incorrect, as `encode_as_sequence` creates token embeddings and not mean embeddings. The normal `encode` function handles both single and batched inputs and should be used instead. The return type was also incorrect, as encode returns a NumPy array. This PR converts the embedding to a list so that the output is consistent with the Embeddings ABC.
This commit is contained in:
parent
b20230c800
commit
ee640d6bd3
@ -6,7 +6,7 @@ from langchain_core.embeddings import Embeddings
|
|||||||
|
|
||||||
|
|
||||||
class Model2vecEmbeddings(Embeddings):
|
class Model2vecEmbeddings(Embeddings):
|
||||||
"""model2v embedding models.
|
"""Model2Vec embedding models.
|
||||||
|
|
||||||
Install model2vec first, run 'pip install -U model2vec'.
|
Install model2vec first, run 'pip install -U model2vec'.
|
||||||
The github repository for model2vec is : https://github.com/MinishLab/model2vec
|
The github repository for model2vec is : https://github.com/MinishLab/model2vec
|
||||||
@ -51,7 +51,7 @@ class Model2vecEmbeddings(Embeddings):
|
|||||||
List of embeddings, one for each text.
|
List of embeddings, one for each text.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return self._model.encode_as_sequence(texts)
|
return self._model.encode(texts).tolist()
|
||||||
|
|
||||||
def embed_query(self, text: str) -> List[float]:
|
def embed_query(self, text: str) -> List[float]:
|
||||||
"""Embed a query using the model2vec embeddings model.
|
"""Embed a query using the model2vec embeddings model.
|
||||||
@ -63,4 +63,4 @@ class Model2vecEmbeddings(Embeddings):
|
|||||||
Embeddings for the text.
|
Embeddings for the text.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return self._model.encode(text)
|
return self._model.encode(text).tolist()
|
||||||
|
Loading…
Reference in New Issue
Block a user