voyageai: remove from monorepo (#31281)

langchain-voyageai is now maintained at
https://github.com/voyage-ai/langchain-voyageai.
This commit is contained in:
ccurme 2025-05-19 12:33:38 -04:00 committed by GitHub
parent 49fbcec34f
commit bf645c83f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 4 additions and 2450 deletions

View File

@ -37,7 +37,6 @@ IGNORED_PARTNERS = [
]
PY_312_MAX_PACKAGES = [
"libs/partners/voyageai",
"libs/partners/chroma", # https://github.com/chroma-core/chroma/issues/4382
]

View File

@ -67,7 +67,6 @@ jobs:
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
ES_API_KEY: ${{ secrets.ES_API_KEY }}
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}

View File

@ -322,7 +322,6 @@ jobs:
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
ES_API_KEY: ${{ secrets.ES_API_KEY }}
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}

View File

@ -103,12 +103,6 @@ repos:
entry: make -C libs/partners/qdrant format
files: ^libs/partners/qdrant/
pass_filenames: false
- id: voyageai
name: format partners/voyageai
language: system
entry: make -C libs/partners/voyageai format
files: ^libs/partners/voyageai/
pass_filenames: false
- id: root
name: format docs, cookbook
language: system

View File

@ -152,8 +152,8 @@ packages:
downloads: 20558
downloads_updated_at: '2025-05-08T20:26:05.985970+00:00'
- name: langchain-voyageai
path: libs/partners/voyageai
repo: langchain-ai/langchain
path: libs/voyageai
repo: voyage-ai/langchain-voyageai
downloads: 27698
downloads_updated_at: '2025-05-08T20:26:05.985970+00:00'
- name: langchain-aws

View File

@ -1 +0,0 @@
__pycache__

View File

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2024 LangChain, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,65 +0,0 @@
.PHONY: all format lint test tests integration_tests docker_tests help extended_tests
# Default target executed when no arguments are given to make.
all: help
.EXPORT_ALL_VARIABLES:
UV_FROZEN = true
# Define a variable for the test file path.
TEST_FILE ?= tests/unit_tests/
integration_test integration_tests: TEST_FILE=tests/integration_tests/
test tests:
uv run --group test pytest --disable-socket --allow-unix-socket $(TEST_FILE)
integration_test integration_tests:
uv run --group test --group test_integration pytest $(TEST_FILE)
test_watch:
uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE)
######################
# LINTING AND FORMATTING
######################
# Define a variable for Python and notebook files.
PYTHON_FILES=.
MYPY_CACHE=.mypy_cache
lint format: PYTHON_FILES=.
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/voyageai --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
lint_package: PYTHON_FILES=langchain_voyageai
lint_tests: PYTHON_FILES=tests
lint_tests: MYPY_CACHE=.mypy_cache_test
lint lint_diff lint_package lint_tests:
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES) --diff
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && uv run --all-groups mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
format format_diff:
[ "$(PYTHON_FILES)" = "" ] || uv run --group lint ruff format $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || uv run --group lint ruff --select I --fix $(PYTHON_FILES)
spell_check:
uv run --all-groups codespell --toml pyproject.toml
spell_fix:
uv run --all-groups codespell --toml pyproject.toml -w
check_imports: $(shell find langchain_voyageai -name '*.py')
uv run --all-groups python ./scripts/check_imports.py $^
######################
# HELP
######################
help:
@echo '----'
@echo 'check_imports - check imports'
@echo 'format - run code formatters'
@echo 'lint - run linters'
@echo 'test - run unit tests'
@echo 'tests - run unit tests'
@echo 'test TEST_FILE=<test_file> - run all tests in file'

View File

@ -1,21 +1,3 @@
# langchain-voyageai
This package has moved!
This package contains the LangChain integrations for VoyageAI through their `voyageai` client package.
## Installation and Setup
- Install the LangChain partner package
```bash
pip install langchain-voyageai
```
- Get an VoyageAI api key and set it as an environment variable (`VOYAGE_API_KEY`) or use the API key as a parameter in the Client.
## Text Embedding Model
See a [usage example](https://python.langchain.com/docs/integrations/text_embedding/voyageai)
```python
from langchain_voyageai import VoyageAIEmbeddings
```
https://github.com/voyage-ai/langchain-voyageai

View File

@ -1,4 +0,0 @@
from langchain_voyageai.embeddings import VoyageAIEmbeddings
from langchain_voyageai.rerank import VoyageAIRerank
__all__ = ["VoyageAIEmbeddings", "VoyageAIRerank"]

View File

@ -1,156 +0,0 @@
import logging
from collections.abc import Iterable
from typing import Any, Literal, Optional, cast
import voyageai # type: ignore
from langchain_core.embeddings import Embeddings
from langchain_core.utils import secret_from_env
from pydantic import (
BaseModel,
ConfigDict,
Field,
PrivateAttr,
SecretStr,
model_validator,
)
from typing_extensions import Self
logger = logging.getLogger(__name__)
DEFAULT_VOYAGE_2_BATCH_SIZE = 72
DEFAULT_VOYAGE_3_LITE_BATCH_SIZE = 30
DEFAULT_VOYAGE_3_BATCH_SIZE = 10
DEFAULT_BATCH_SIZE = 7
class VoyageAIEmbeddings(BaseModel, Embeddings):
"""VoyageAIEmbeddings embedding model.
Example:
.. code-block:: python
from langchain_voyageai import VoyageAIEmbeddings
model = VoyageAIEmbeddings()
"""
_client: voyageai.Client = PrivateAttr()
_aclient: voyageai.client_async.AsyncClient = PrivateAttr()
model: str
batch_size: int
output_dimension: Optional[Literal[256, 512, 1024, 2048]] = None
show_progress_bar: bool = False
truncation: bool = True
voyage_api_key: SecretStr = Field(
alias="api_key",
default_factory=secret_from_env(
"VOYAGE_API_KEY",
error_message="Must set `VOYAGE_API_KEY` environment variable or "
"pass `api_key` to VoyageAIEmbeddings constructor.",
),
)
model_config = ConfigDict(
extra="forbid",
populate_by_name=True,
)
@model_validator(mode="before")
@classmethod
def default_values(cls, values: dict) -> Any:
"""Set default batch size based on model"""
model = values.get("model")
batch_size = values.get("batch_size")
if batch_size is None:
values["batch_size"] = (
DEFAULT_VOYAGE_2_BATCH_SIZE
if model in ["voyage-2", "voyage-02"]
else (
DEFAULT_VOYAGE_3_LITE_BATCH_SIZE
if model == "voyage-3-lite"
else (
DEFAULT_VOYAGE_3_BATCH_SIZE
if model == "voyage-3"
else DEFAULT_BATCH_SIZE
)
)
)
return values
@model_validator(mode="after")
def validate_environment(self) -> Self:
"""Validate that VoyageAI credentials exist in environment."""
api_key_str = self.voyage_api_key.get_secret_value()
self._client = voyageai.Client(api_key=api_key_str)
self._aclient = voyageai.client_async.AsyncClient(api_key=api_key_str)
return self
def _get_batch_iterator(self, texts: list[str]) -> Iterable:
if self.show_progress_bar:
try:
from tqdm.auto import tqdm # type: ignore
except ImportError as e:
raise ImportError(
"Must have tqdm installed if `show_progress_bar` is set to True. "
"Please install with `pip install tqdm`."
) from e
_iter = tqdm(range(0, len(texts), self.batch_size))
else:
_iter = range(0, len(texts), self.batch_size) # type: ignore
return _iter
def embed_documents(self, texts: list[str]) -> list[list[float]]:
"""Embed search docs."""
embeddings: list[list[float]] = []
_iter = self._get_batch_iterator(texts)
for i in _iter:
r = self._client.embed(
texts[i : i + self.batch_size],
model=self.model,
input_type="document",
truncation=self.truncation,
output_dimension=self.output_dimension,
).embeddings
embeddings.extend(cast(Iterable[list[float]], r))
return embeddings
def embed_query(self, text: str) -> list[float]:
"""Embed query text."""
r = self._client.embed(
[text],
model=self.model,
input_type="query",
truncation=self.truncation,
output_dimension=self.output_dimension,
).embeddings[0]
return cast(list[float], r)
async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
embeddings: list[list[float]] = []
_iter = self._get_batch_iterator(texts)
for i in _iter:
r = await self._aclient.embed(
texts[i : i + self.batch_size],
model=self.model,
input_type="document",
truncation=self.truncation,
output_dimension=self.output_dimension,
)
embeddings.extend(cast(Iterable[list[float]], r.embeddings))
return embeddings
async def aembed_query(self, text: str) -> list[float]:
r = await self._aclient.embed(
[text],
model=self.model,
input_type="query",
truncation=self.truncation,
output_dimension=self.output_dimension,
)
return cast(list[float], r.embeddings[0])

View File

@ -1,156 +0,0 @@
from __future__ import annotations
import os
from collections.abc import Sequence
from copy import deepcopy
from typing import Any, Optional, Union
import voyageai # type: ignore
from langchain_core.callbacks.manager import Callbacks
from langchain_core.documents import Document
from langchain_core.documents.compressor import BaseDocumentCompressor
from langchain_core.utils import convert_to_secret_str
from pydantic import ConfigDict, SecretStr, model_validator
from voyageai.object import RerankingObject # type: ignore
class VoyageAIRerank(BaseDocumentCompressor):
"""Document compressor that uses `VoyageAI Rerank API`."""
client: voyageai.Client = None # type: ignore
aclient: voyageai.AsyncClient = None # type: ignore
"""VoyageAI clients to use for compressing documents."""
voyage_api_key: Optional[SecretStr] = None
"""VoyageAI API key. Must be specified directly or via environment variable
VOYAGE_API_KEY."""
model: str
"""Model to use for reranking."""
top_k: Optional[int] = None
"""Number of documents to return."""
truncation: bool = True
model_config = ConfigDict(
arbitrary_types_allowed=True,
)
@model_validator(mode="before")
@classmethod
def validate_environment(cls, values: dict) -> Any:
"""Validate that api key exists in environment."""
voyage_api_key = values.get("voyage_api_key") or os.getenv(
"VOYAGE_API_KEY", None
)
if voyage_api_key:
api_key_secretstr = convert_to_secret_str(voyage_api_key)
values["voyage_api_key"] = api_key_secretstr
api_key_str = api_key_secretstr.get_secret_value()
else:
api_key_str = None
values["client"] = voyageai.Client(api_key=api_key_str)
values["aclient"] = voyageai.AsyncClient(api_key=api_key_str)
return values
def _rerank(
self,
documents: Sequence[Union[str, Document]],
query: str,
) -> RerankingObject:
"""Returns an ordered list of documents ordered by their relevance
to the provided query.
Args:
query: The query to use for reranking.
documents: A sequence of documents to rerank.
"""
docs = [
doc.page_content if isinstance(doc, Document) else doc for doc in documents
]
return self.client.rerank(
query=query,
documents=docs,
model=self.model,
top_k=self.top_k,
truncation=self.truncation,
)
async def _arerank(
self,
documents: Sequence[Union[str, Document]],
query: str,
) -> RerankingObject:
"""Returns an ordered list of documents ordered by their relevance
to the provided query.
Args:
query: The query to use for reranking.
documents: A sequence of documents to rerank.
"""
docs = [
doc.page_content if isinstance(doc, Document) else doc for doc in documents
]
return await self.aclient.rerank(
query=query,
documents=docs,
model=self.model,
top_k=self.top_k,
truncation=self.truncation,
)
def compress_documents(
self,
documents: Sequence[Document],
query: str,
callbacks: Optional[Callbacks] = None,
) -> Sequence[Document]:
"""
Compress documents using VoyageAI's rerank API.
Args:
documents: A sequence of documents to compress.
query: The query to use for compressing the documents.
callbacks: Callbacks to run during the compression process.
Returns:
A sequence of compressed documents in relevance_score order.
"""
if len(documents) == 0:
return []
compressed = []
for res in self._rerank(documents, query).results:
doc = documents[res.index]
doc_copy = Document(doc.page_content, metadata=deepcopy(doc.metadata))
doc_copy.metadata["relevance_score"] = res.relevance_score
compressed.append(doc_copy)
return compressed
async def acompress_documents(
self,
documents: Sequence[Document],
query: str,
callbacks: Optional[Callbacks] = None,
) -> Sequence[Document]:
"""
Compress documents using VoyageAI's rerank API.
Args:
documents: A sequence of documents to compress.
query: The query to use for compressing the documents.
callbacks: Callbacks to run during the compression process.
Returns:
A sequence of compressed documents in relevance_score order.
"""
if len(documents) == 0:
return []
compressed = []
for res in (await self._arerank(documents, query)).results:
doc = documents[res.index]
doc_copy = Document(doc.page_content, metadata=deepcopy(doc.metadata))
doc_copy.metadata["relevance_score"] = res.relevance_score
compressed.append(doc_copy)
return compressed

View File

@ -1,66 +0,0 @@
[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"
[tool.pdm.build]
includes = []
[project]
authors = []
license = { text = "MIT" }
requires-python = "<3.13,>=3.9"
dependencies = [
"langchain-core<1.0.0,>=0.3.29",
"voyageai<1,>=0.3.2",
"pydantic<3,>=2",
]
name = "langchain-voyageai"
version = "0.1.4"
description = "An integration package connecting VoyageAI and LangChain"
readme = "README.md"
[project.urls]
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/voyageai"
"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-voyageai%3D%3D0%22&expanded=true"
repository = "https://github.com/langchain-ai/langchain"
[dependency-groups]
test = [
"pytest<8.0.0,>=7.3.0",
"freezegun<2.0.0,>=1.2.2",
"pytest-mock<4.0.0,>=3.10.0",
"syrupy<5.0.0,>=4.0.2",
"pytest-watcher<1.0.0,>=0.3.4",
"pytest-asyncio<1.0.0,>=0.21.1",
"pytest-socket<1.0.0,>=0.7.0",
"langchain-core",
]
codespell = ["codespell<3.0.0,>=2.2.0"]
test_integration = []
lint = ["ruff<1.0.0,>=0.1.5"]
dev = ["langchain-core"]
typing = ["mypy<2.0,>=1.10", "langchain-core"]
[tool.uv.sources]
langchain-core = { path = "../../core", editable = true }
[tool.mypy]
disallow_untyped_defs = "True"
[tool.ruff]
target-version = "py39"
[tool.ruff.lint]
select = ["E", "F", "I", "UP"]
ignore = [ "UP007", ]
[tool.coverage.run]
omit = ["tests/*"]
[tool.pytest.ini_options]
addopts = "--strict-markers --strict-config --durations=5"
markers = [
"requires: mark tests as requiring a specific library",
"compile: mark placeholder test used to compile integration tests without running them",
]
asyncio_mode = "auto"

View File

@ -1,17 +0,0 @@
import sys
import traceback
from importlib.machinery import SourceFileLoader
if __name__ == "__main__":
files = sys.argv[1:]
has_failure = False
for file in files:
try:
SourceFileLoader("x", file).load_module()
except Exception:
has_failure = True
print(file)
traceback.print_exc()
print()
sys.exit(1 if has_failure else 0)

View File

@ -1,17 +0,0 @@
#!/bin/bash
set -eu
# Initialize a variable to keep track of errors
errors=0
# make sure not importing from langchain or langchain_experimental
git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
# Decide on an exit status based on the errors
if [ "$errors" -gt 0 ]; then
exit 1
else
exit 0
fi

View File

@ -1,7 +0,0 @@
import pytest
@pytest.mark.compile
def test_placeholder() -> None:
"""Used for compiling integration tests without running any real tests."""
pass

View File

@ -1,62 +0,0 @@
"""Test VoyageAI embeddings."""
from langchain_voyageai import VoyageAIEmbeddings
# Please set VOYAGE_API_KEY in the environment variables
MODEL = "voyage-2"
def test_langchain_voyageai_embedding_documents() -> None:
"""Test voyage embeddings."""
documents = ["foo bar"]
embedding = VoyageAIEmbeddings(model=MODEL) # type: ignore[call-arg]
output = embedding.embed_documents(documents)
assert len(output) == 1
assert len(output[0]) == 1024
def test_langchain_voyageai_embedding_documents_multiple() -> None:
"""Test voyage embeddings."""
documents = ["foo bar", "bar foo", "foo"]
embedding = VoyageAIEmbeddings(model=MODEL, batch_size=2)
output = embedding.embed_documents(documents)
assert len(output) == 3
assert len(output[0]) == 1024
assert len(output[1]) == 1024
assert len(output[2]) == 1024
def test_langchain_voyageai_embedding_query() -> None:
"""Test voyage embeddings."""
document = "foo bar"
embedding = VoyageAIEmbeddings(model=MODEL) # type: ignore[call-arg]
output = embedding.embed_query(document)
assert len(output) == 1024
async def test_langchain_voyageai_async_embedding_documents_multiple() -> None:
"""Test voyage embeddings."""
documents = ["foo bar", "bar foo", "foo"]
embedding = VoyageAIEmbeddings(model=MODEL, batch_size=2)
output = await embedding.aembed_documents(documents)
assert len(output) == 3
assert len(output[0]) == 1024
assert len(output[1]) == 1024
assert len(output[2]) == 1024
async def test_langchain_voyageai_async_embedding_query() -> None:
"""Test voyage embeddings."""
document = "foo bar"
embedding = VoyageAIEmbeddings(model=MODEL) # type: ignore[call-arg]
output = await embedding.aembed_query(document)
assert len(output) == 1024
def test_langchain_voyageai_embedding_documents_with_output_dimension() -> None:
"""Test voyage embeddings."""
documents = ["foo bar"]
embedding = VoyageAIEmbeddings(model="voyage-3-large", output_dimension=256) # type: ignore[call-arg]
output = embedding.embed_documents(documents)
assert len(output) == 1
assert len(output[0]) == 256

View File

@ -1,68 +0,0 @@
"""Test the voyageai reranker."""
import os
from langchain_core.documents import Document
from langchain_voyageai.rerank import VoyageAIRerank
def test_voyageai_reranker_init() -> None:
"""Test the voyageai reranker initializes correctly."""
VoyageAIRerank(voyage_api_key="foo", model="foo") # type: ignore[arg-type]
def test_sync() -> None:
rerank = VoyageAIRerank(
voyage_api_key=os.environ["VOYAGE_API_KEY"], # type: ignore[arg-type]
model="rerank-lite-1",
)
doc_list = [
"The Mediterranean diet emphasizes fish, olive oil, and vegetables"
", believed to reduce chronic diseases.",
"Photosynthesis in plants converts light energy into glucose and "
"produces essential oxygen.",
"20th-century innovations, from radios to smartphones, centered "
"on electronic advancements.",
"Rivers provide water, irrigation, and habitat for aquatic species, "
"vital for ecosystems.",
"Apples conference call to discuss fourth fiscal quarter results and "
"business updates is scheduled for Thursday, November 2, 2023 at 2:00 "
"p.m. PT / 5:00 p.m. ET.",
"Shakespeare's works, like 'Hamlet' and 'A Midsummer Night's Dream,' "
"endure in literature.",
]
documents = [Document(page_content=x) for x in doc_list]
result = rerank.compress_documents(
query="When is the Apple's conference call scheduled?", documents=documents
)
assert len(doc_list) == len(result)
async def test_async() -> None:
rerank = VoyageAIRerank(
voyage_api_key=os.environ["VOYAGE_API_KEY"], # type: ignore[arg-type]
model="rerank-lite-1",
)
doc_list = [
"The Mediterranean diet emphasizes fish, olive oil, and vegetables"
", believed to reduce chronic diseases.",
"Photosynthesis in plants converts light energy into glucose and "
"produces essential oxygen.",
"20th-century innovations, from radios to smartphones, centered "
"on electronic advancements.",
"Rivers provide water, irrigation, and habitat for aquatic species, "
"vital for ecosystems.",
"Apples conference call to discuss fourth fiscal quarter results and "
"business updates is scheduled for Thursday, November 2, 2023 at 2:00 "
"p.m. PT / 5:00 p.m. ET.",
"Shakespeare's works, like 'Hamlet' and 'A Midsummer Night's Dream,' "
"endure in literature.",
]
documents = [Document(page_content=x) for x in doc_list]
result = await rerank.acompress_documents(
query="When is the Apple's conference call scheduled?", documents=documents
)
assert len(doc_list) == len(result)

View File

@ -1,61 +0,0 @@
"""Test embedding model integration."""
from langchain_core.embeddings import Embeddings
from langchain_voyageai import VoyageAIEmbeddings
MODEL = "voyage-2"
def test_initialization_voyage_2() -> None:
"""Test embedding model initialization."""
emb = VoyageAIEmbeddings(api_key="NOT_A_VALID_KEY", model=MODEL) # type: ignore
assert isinstance(emb, Embeddings)
assert emb.batch_size == 72
assert emb.model == MODEL
assert emb._client is not None
def test_initialization_voyage_2_with_full_api_key_name() -> None:
"""Test embedding model initialization."""
# Testing that we can initialize the model using `voyage_api_key`
# instead of `api_key`
emb = VoyageAIEmbeddings(voyage_api_key="NOT_A_VALID_KEY", model=MODEL) # type: ignore
assert isinstance(emb, Embeddings)
assert emb.batch_size == 72
assert emb.model == MODEL
assert emb._client is not None
def test_initialization_voyage_1() -> None:
"""Test embedding model initialization."""
emb = VoyageAIEmbeddings(api_key="NOT_A_VALID_KEY", model="voyage-01") # type: ignore
assert isinstance(emb, Embeddings)
assert emb.batch_size == 7
assert emb.model == "voyage-01"
assert emb._client is not None
def test_initialization_voyage_1_batch_size() -> None:
"""Test embedding model initialization."""
emb = VoyageAIEmbeddings(
api_key="NOT_A_VALID_KEY", # type: ignore
model="voyage-01",
batch_size=15,
)
assert isinstance(emb, Embeddings)
assert emb.batch_size == 15
assert emb.model == "voyage-01"
assert emb._client is not None
def test_initialization_with_output_dimension() -> None:
emb = VoyageAIEmbeddings(
api_key="NOT_A_VALID_KEY", # type: ignore
model="voyage-3-large",
output_dimension=256,
batch_size=10,
)
assert isinstance(emb, Embeddings)
assert emb.model == "voyage-3-large"
assert emb.output_dimension == 256

View File

@ -1,10 +0,0 @@
from langchain_voyageai import __all__
EXPECTED_ALL = [
"VoyageAIEmbeddings",
"VoyageAIRerank",
]
def test_all_imports() -> None:
assert sorted(EXPECTED_ALL) == sorted(__all__)

View File

@ -1,83 +0,0 @@
from collections import namedtuple
from typing import Any
import pytest # type: ignore
from langchain_core.documents import Document
from voyageai.api_resources import VoyageResponse # type: ignore
from voyageai.object import RerankingObject # type: ignore
from langchain_voyageai.rerank import VoyageAIRerank
doc_list = [
"The Mediterranean diet emphasizes fish, olive oil, and vegetables"
", believed to reduce chronic diseases.",
"Photosynthesis in plants converts light energy into glucose and "
"produces essential oxygen.",
"20th-century innovations, from radios to smartphones, centered "
"on electronic advancements.",
"Rivers provide water, irrigation, and habitat for aquatic species, "
"vital for ecosystems.",
"Apples conference call to discuss fourth fiscal quarter results and "
"business updates is scheduled for Thursday, November 2, 2023 at 2:00 "
"p.m. PT / 5:00 p.m. ET.",
"Shakespeare's works, like 'Hamlet' and 'A Midsummer Night's Dream,' "
"endure in literature.",
]
documents = [Document(page_content=x) for x in doc_list]
@pytest.mark.requires("voyageai")
def test_init() -> None:
VoyageAIRerank(
voyage_api_key="foo", # type: ignore[arg-type]
model="rerank-lite-1",
)
def get_mock_rerank_result() -> RerankingObject:
VoyageResultItem = namedtuple("VoyageResultItem", ["index", "relevance_score"])
Usage = namedtuple("Usage", ["total_tokens"])
voyage_response = VoyageResponse()
voyage_response.data = [
VoyageResultItem(index=1, relevance_score=0.9),
VoyageResultItem(index=0, relevance_score=0.8),
]
voyage_response.usage = Usage(total_tokens=255)
return RerankingObject(response=voyage_response, documents=doc_list)
@pytest.mark.requires("voyageai")
def test_rerank_unit_test(mocker: Any) -> None:
mocker.patch("voyageai.Client.rerank").return_value = get_mock_rerank_result()
expected_result = [
Document(
page_content="Photosynthesis in plants converts light energy into "
"glucose and produces essential oxygen.",
metadata={"relevance_score": 0.9},
),
Document(
page_content="The Mediterranean diet emphasizes fish, olive oil, and "
"vegetables, believed to reduce chronic diseases.",
metadata={"relevance_score": 0.8},
),
]
rerank = VoyageAIRerank(
voyage_api_key="foo", # type: ignore[arg-type]
model="rerank-lite-1",
)
result = rerank.compress_documents(
documents=documents, query="When is the Apple's conference call scheduled?"
)
assert expected_result == result
def test_rerank_empty_input() -> None:
rerank = VoyageAIRerank(
voyage_api_key="foo", # type: ignore[arg-type]
model="rerank-lite-1",
)
result = rerank.compress_documents(
documents=[], query="When is the Apple's conference call scheduled?"
)
assert len(result) == 0

File diff suppressed because it is too large Load Diff