Files
langchain/libs/partners/voyageai/tests/integration_tests/test_embeddings.py
Omri Eliyahu Levy f8883a1321 partners/voyageai: enable setting output dimension (#28740)
Voyage has introduced voyage-3-large and voyage-code-3, which feature
different output dimensions by leveraging a technique called "Matryoshka
Embeddings" (see blog -
https://blog.voyageai.com/2024/12/04/voyage-code-3/).
These two models are available in various sizes: [256, 512, 1024, 2048]
(https://docs.voyageai.com/docs/embeddings#model-choices).

This PR adds the option to set the required output dimension.
2024-12-17 10:02:00 -05:00

63 lines
2.1 KiB
Python

"""Test VoyageAI embeddings."""
from langchain_voyageai import VoyageAIEmbeddings
# Please set VOYAGE_API_KEY in the environment variables
MODEL = "voyage-2"
def test_langchain_voyageai_embedding_documents() -> None:
"""Test voyage embeddings."""
documents = ["foo bar"]
embedding = VoyageAIEmbeddings(model=MODEL) # type: ignore[call-arg]
output = embedding.embed_documents(documents)
assert len(output) == 1
assert len(output[0]) == 1024
def test_langchain_voyageai_embedding_documents_multiple() -> None:
"""Test voyage embeddings."""
documents = ["foo bar", "bar foo", "foo"]
embedding = VoyageAIEmbeddings(model=MODEL, batch_size=2)
output = embedding.embed_documents(documents)
assert len(output) == 3
assert len(output[0]) == 1024
assert len(output[1]) == 1024
assert len(output[2]) == 1024
def test_langchain_voyageai_embedding_query() -> None:
"""Test voyage embeddings."""
document = "foo bar"
embedding = VoyageAIEmbeddings(model=MODEL) # type: ignore[call-arg]
output = embedding.embed_query(document)
assert len(output) == 1024
async def test_langchain_voyageai_async_embedding_documents_multiple() -> None:
"""Test voyage embeddings."""
documents = ["foo bar", "bar foo", "foo"]
embedding = VoyageAIEmbeddings(model=MODEL, batch_size=2)
output = await embedding.aembed_documents(documents)
assert len(output) == 3
assert len(output[0]) == 1024
assert len(output[1]) == 1024
assert len(output[2]) == 1024
async def test_langchain_voyageai_async_embedding_query() -> None:
"""Test voyage embeddings."""
document = "foo bar"
embedding = VoyageAIEmbeddings(model=MODEL) # type: ignore[call-arg]
output = await embedding.aembed_query(document)
assert len(output) == 1024
def test_langchain_voyageai_embedding_documents_with_output_dimension() -> None:
"""Test voyage embeddings."""
documents = ["foo bar"]
embedding = VoyageAIEmbeddings(model="voyage-3-large", output_dimension=256) # type: ignore[call-arg]
output = embedding.embed_documents(documents)
assert len(output) == 1
assert len(output[0]) == 256