mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-02 11:39:18 +00:00
community: Fix FastEmbedEmbeddings (#24462)
## Description This PR: - Fixes the validation error in `FastEmbedEmbeddings`. - Adds support for `batch_size`, `parallel` params. - Removes support for very old FastEmbed versions. - Updates the FastEmbed doc with the new params. Associated Issues: - Resolves #24039 - Resolves #https://github.com/qdrant/fastembed/issues/296
This commit is contained in:
@@ -11,8 +11,9 @@ from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
|
||||
@pytest.mark.parametrize("max_length", [50, 512])
|
||||
@pytest.mark.parametrize("doc_embed_type", ["default", "passage"])
|
||||
@pytest.mark.parametrize("threads", [0, 10])
|
||||
@pytest.mark.parametrize("batch_size", [1, 10])
|
||||
def test_fastembed_embedding_documents(
|
||||
model_name: str, max_length: int, doc_embed_type: str, threads: int
|
||||
model_name: str, max_length: int, doc_embed_type: str, threads: int, batch_size: int
|
||||
) -> None:
|
||||
"""Test fastembed embeddings for documents."""
|
||||
documents = ["foo bar", "bar foo"]
|
||||
@@ -21,6 +22,7 @@ def test_fastembed_embedding_documents(
|
||||
max_length=max_length,
|
||||
doc_embed_type=doc_embed_type, # type: ignore[arg-type]
|
||||
threads=threads,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 2
|
||||
@@ -31,10 +33,15 @@ def test_fastembed_embedding_documents(
|
||||
"model_name", ["sentence-transformers/all-MiniLM-L6-v2", "BAAI/bge-small-en-v1.5"]
|
||||
)
|
||||
@pytest.mark.parametrize("max_length", [50, 512])
|
||||
def test_fastembed_embedding_query(model_name: str, max_length: int) -> None:
|
||||
@pytest.mark.parametrize("batch_size", [1, 10])
|
||||
def test_fastembed_embedding_query(
|
||||
model_name: str, max_length: int, batch_size: int
|
||||
) -> None:
|
||||
"""Test fastembed embeddings for query."""
|
||||
document = "foo bar"
|
||||
embedding = FastEmbedEmbeddings(model_name=model_name, max_length=max_length) # type: ignore[call-arg]
|
||||
embedding = FastEmbedEmbeddings(
|
||||
model_name=model_name, max_length=max_length, batch_size=batch_size
|
||||
) # type: ignore[call-arg]
|
||||
output = embedding.embed_query(document)
|
||||
assert len(output) == 384
|
||||
|
||||
|
Reference in New Issue
Block a user