mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-23 15:19:33 +00:00
voyageai: remove from monorepo (#31281)
langchain-voyageai is now maintained at https://github.com/voyage-ai/langchain-voyageai.
This commit is contained in:
parent
49fbcec34f
commit
bf645c83f4
1
.github/scripts/check_diff.py
vendored
1
.github/scripts/check_diff.py
vendored
@ -37,7 +37,6 @@ IGNORED_PARTNERS = [
|
||||
]
|
||||
|
||||
PY_312_MAX_PACKAGES = [
|
||||
"libs/partners/voyageai",
|
||||
"libs/partners/chroma", # https://github.com/chroma-core/chroma/issues/4382
|
||||
]
|
||||
|
||||
|
1
.github/workflows/_integration_test.yml
vendored
1
.github/workflows/_integration_test.yml
vendored
@ -67,7 +67,6 @@ jobs:
|
||||
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
|
||||
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
|
||||
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
|
||||
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
|
1
.github/workflows/_release.yml
vendored
1
.github/workflows/_release.yml
vendored
@ -322,7 +322,6 @@ jobs:
|
||||
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
|
||||
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
|
||||
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
|
||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
|
@ -103,12 +103,6 @@ repos:
|
||||
entry: make -C libs/partners/qdrant format
|
||||
files: ^libs/partners/qdrant/
|
||||
pass_filenames: false
|
||||
- id: voyageai
|
||||
name: format partners/voyageai
|
||||
language: system
|
||||
entry: make -C libs/partners/voyageai format
|
||||
files: ^libs/partners/voyageai/
|
||||
pass_filenames: false
|
||||
- id: root
|
||||
name: format docs, cookbook
|
||||
language: system
|
||||
|
@ -152,8 +152,8 @@ packages:
|
||||
downloads: 20558
|
||||
downloads_updated_at: '2025-05-08T20:26:05.985970+00:00'
|
||||
- name: langchain-voyageai
|
||||
path: libs/partners/voyageai
|
||||
repo: langchain-ai/langchain
|
||||
path: libs/voyageai
|
||||
repo: voyage-ai/langchain-voyageai
|
||||
downloads: 27698
|
||||
downloads_updated_at: '2025-05-08T20:26:05.985970+00:00'
|
||||
- name: langchain-aws
|
||||
|
1
libs/partners/voyageai/.gitignore
vendored
1
libs/partners/voyageai/.gitignore
vendored
@ -1 +0,0 @@
|
||||
__pycache__
|
@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 LangChain, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -1,65 +0,0 @@
|
||||
.PHONY: all format lint test tests integration_tests docker_tests help extended_tests
|
||||
|
||||
# Default target executed when no arguments are given to make.
|
||||
all: help
|
||||
|
||||
.EXPORT_ALL_VARIABLES:
|
||||
UV_FROZEN = true
|
||||
|
||||
# Define a variable for the test file path.
|
||||
TEST_FILE ?= tests/unit_tests/
|
||||
integration_test integration_tests: TEST_FILE=tests/integration_tests/
|
||||
|
||||
test tests:
|
||||
uv run --group test pytest --disable-socket --allow-unix-socket $(TEST_FILE)
|
||||
|
||||
integration_test integration_tests:
|
||||
uv run --group test --group test_integration pytest $(TEST_FILE)
|
||||
|
||||
test_watch:
|
||||
uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE)
|
||||
|
||||
|
||||
######################
|
||||
# LINTING AND FORMATTING
|
||||
######################
|
||||
|
||||
# Define a variable for Python and notebook files.
|
||||
PYTHON_FILES=.
|
||||
MYPY_CACHE=.mypy_cache
|
||||
lint format: PYTHON_FILES=.
|
||||
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/voyageai --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
|
||||
lint_package: PYTHON_FILES=langchain_voyageai
|
||||
lint_tests: PYTHON_FILES=tests
|
||||
lint_tests: MYPY_CACHE=.mypy_cache_test
|
||||
|
||||
lint lint_diff lint_package lint_tests:
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES) --diff
|
||||
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && uv run --all-groups mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
||||
|
||||
format format_diff:
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --group lint ruff format $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --group lint ruff --select I --fix $(PYTHON_FILES)
|
||||
|
||||
spell_check:
|
||||
uv run --all-groups codespell --toml pyproject.toml
|
||||
|
||||
spell_fix:
|
||||
uv run --all-groups codespell --toml pyproject.toml -w
|
||||
|
||||
check_imports: $(shell find langchain_voyageai -name '*.py')
|
||||
uv run --all-groups python ./scripts/check_imports.py $^
|
||||
|
||||
######################
|
||||
# HELP
|
||||
######################
|
||||
|
||||
help:
|
||||
@echo '----'
|
||||
@echo 'check_imports - check imports'
|
||||
@echo 'format - run code formatters'
|
||||
@echo 'lint - run linters'
|
||||
@echo 'test - run unit tests'
|
||||
@echo 'tests - run unit tests'
|
||||
@echo 'test TEST_FILE=<test_file> - run all tests in file'
|
@ -1,21 +1,3 @@
|
||||
# langchain-voyageai
|
||||
This package has moved!
|
||||
|
||||
This package contains the LangChain integrations for VoyageAI through their `voyageai` client package.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Install the LangChain partner package
|
||||
```bash
|
||||
pip install langchain-voyageai
|
||||
```
|
||||
- Get an VoyageAI api key and set it as an environment variable (`VOYAGE_API_KEY`) or use the API key as a parameter in the Client.
|
||||
|
||||
|
||||
|
||||
## Text Embedding Model
|
||||
|
||||
See a [usage example](https://python.langchain.com/docs/integrations/text_embedding/voyageai)
|
||||
|
||||
```python
|
||||
from langchain_voyageai import VoyageAIEmbeddings
|
||||
```
|
||||
https://github.com/voyage-ai/langchain-voyageai
|
||||
|
@ -1,4 +0,0 @@
|
||||
from langchain_voyageai.embeddings import VoyageAIEmbeddings
|
||||
from langchain_voyageai.rerank import VoyageAIRerank
|
||||
|
||||
__all__ = ["VoyageAIEmbeddings", "VoyageAIRerank"]
|
@ -1,156 +0,0 @@
|
||||
import logging
|
||||
from collections.abc import Iterable
|
||||
from typing import Any, Literal, Optional, cast
|
||||
|
||||
import voyageai # type: ignore
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.utils import secret_from_env
|
||||
from pydantic import (
|
||||
BaseModel,
|
||||
ConfigDict,
|
||||
Field,
|
||||
PrivateAttr,
|
||||
SecretStr,
|
||||
model_validator,
|
||||
)
|
||||
from typing_extensions import Self
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_VOYAGE_2_BATCH_SIZE = 72
|
||||
DEFAULT_VOYAGE_3_LITE_BATCH_SIZE = 30
|
||||
DEFAULT_VOYAGE_3_BATCH_SIZE = 10
|
||||
DEFAULT_BATCH_SIZE = 7
|
||||
|
||||
|
||||
class VoyageAIEmbeddings(BaseModel, Embeddings):
|
||||
"""VoyageAIEmbeddings embedding model.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_voyageai import VoyageAIEmbeddings
|
||||
|
||||
model = VoyageAIEmbeddings()
|
||||
"""
|
||||
|
||||
_client: voyageai.Client = PrivateAttr()
|
||||
_aclient: voyageai.client_async.AsyncClient = PrivateAttr()
|
||||
model: str
|
||||
batch_size: int
|
||||
|
||||
output_dimension: Optional[Literal[256, 512, 1024, 2048]] = None
|
||||
show_progress_bar: bool = False
|
||||
truncation: bool = True
|
||||
voyage_api_key: SecretStr = Field(
|
||||
alias="api_key",
|
||||
default_factory=secret_from_env(
|
||||
"VOYAGE_API_KEY",
|
||||
error_message="Must set `VOYAGE_API_KEY` environment variable or "
|
||||
"pass `api_key` to VoyageAIEmbeddings constructor.",
|
||||
),
|
||||
)
|
||||
|
||||
model_config = ConfigDict(
|
||||
extra="forbid",
|
||||
populate_by_name=True,
|
||||
)
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def default_values(cls, values: dict) -> Any:
|
||||
"""Set default batch size based on model"""
|
||||
model = values.get("model")
|
||||
batch_size = values.get("batch_size")
|
||||
if batch_size is None:
|
||||
values["batch_size"] = (
|
||||
DEFAULT_VOYAGE_2_BATCH_SIZE
|
||||
if model in ["voyage-2", "voyage-02"]
|
||||
else (
|
||||
DEFAULT_VOYAGE_3_LITE_BATCH_SIZE
|
||||
if model == "voyage-3-lite"
|
||||
else (
|
||||
DEFAULT_VOYAGE_3_BATCH_SIZE
|
||||
if model == "voyage-3"
|
||||
else DEFAULT_BATCH_SIZE
|
||||
)
|
||||
)
|
||||
)
|
||||
return values
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_environment(self) -> Self:
|
||||
"""Validate that VoyageAI credentials exist in environment."""
|
||||
api_key_str = self.voyage_api_key.get_secret_value()
|
||||
self._client = voyageai.Client(api_key=api_key_str)
|
||||
self._aclient = voyageai.client_async.AsyncClient(api_key=api_key_str)
|
||||
return self
|
||||
|
||||
def _get_batch_iterator(self, texts: list[str]) -> Iterable:
|
||||
if self.show_progress_bar:
|
||||
try:
|
||||
from tqdm.auto import tqdm # type: ignore
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Must have tqdm installed if `show_progress_bar` is set to True. "
|
||||
"Please install with `pip install tqdm`."
|
||||
) from e
|
||||
|
||||
_iter = tqdm(range(0, len(texts), self.batch_size))
|
||||
else:
|
||||
_iter = range(0, len(texts), self.batch_size) # type: ignore
|
||||
|
||||
return _iter
|
||||
|
||||
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
||||
"""Embed search docs."""
|
||||
embeddings: list[list[float]] = []
|
||||
|
||||
_iter = self._get_batch_iterator(texts)
|
||||
for i in _iter:
|
||||
r = self._client.embed(
|
||||
texts[i : i + self.batch_size],
|
||||
model=self.model,
|
||||
input_type="document",
|
||||
truncation=self.truncation,
|
||||
output_dimension=self.output_dimension,
|
||||
).embeddings
|
||||
embeddings.extend(cast(Iterable[list[float]], r))
|
||||
return embeddings
|
||||
|
||||
def embed_query(self, text: str) -> list[float]:
|
||||
"""Embed query text."""
|
||||
r = self._client.embed(
|
||||
[text],
|
||||
model=self.model,
|
||||
input_type="query",
|
||||
truncation=self.truncation,
|
||||
output_dimension=self.output_dimension,
|
||||
).embeddings[0]
|
||||
return cast(list[float], r)
|
||||
|
||||
async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
|
||||
embeddings: list[list[float]] = []
|
||||
|
||||
_iter = self._get_batch_iterator(texts)
|
||||
for i in _iter:
|
||||
r = await self._aclient.embed(
|
||||
texts[i : i + self.batch_size],
|
||||
model=self.model,
|
||||
input_type="document",
|
||||
truncation=self.truncation,
|
||||
output_dimension=self.output_dimension,
|
||||
)
|
||||
embeddings.extend(cast(Iterable[list[float]], r.embeddings))
|
||||
|
||||
return embeddings
|
||||
|
||||
async def aembed_query(self, text: str) -> list[float]:
|
||||
r = await self._aclient.embed(
|
||||
[text],
|
||||
model=self.model,
|
||||
input_type="query",
|
||||
truncation=self.truncation,
|
||||
output_dimension=self.output_dimension,
|
||||
)
|
||||
return cast(list[float], r.embeddings[0])
|
@ -1,156 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from collections.abc import Sequence
|
||||
from copy import deepcopy
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import voyageai # type: ignore
|
||||
from langchain_core.callbacks.manager import Callbacks
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.documents.compressor import BaseDocumentCompressor
|
||||
from langchain_core.utils import convert_to_secret_str
|
||||
from pydantic import ConfigDict, SecretStr, model_validator
|
||||
from voyageai.object import RerankingObject # type: ignore
|
||||
|
||||
|
||||
class VoyageAIRerank(BaseDocumentCompressor):
|
||||
"""Document compressor that uses `VoyageAI Rerank API`."""
|
||||
|
||||
client: voyageai.Client = None # type: ignore
|
||||
aclient: voyageai.AsyncClient = None # type: ignore
|
||||
"""VoyageAI clients to use for compressing documents."""
|
||||
voyage_api_key: Optional[SecretStr] = None
|
||||
"""VoyageAI API key. Must be specified directly or via environment variable
|
||||
VOYAGE_API_KEY."""
|
||||
model: str
|
||||
"""Model to use for reranking."""
|
||||
top_k: Optional[int] = None
|
||||
"""Number of documents to return."""
|
||||
truncation: bool = True
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
)
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_environment(cls, values: dict) -> Any:
|
||||
"""Validate that api key exists in environment."""
|
||||
voyage_api_key = values.get("voyage_api_key") or os.getenv(
|
||||
"VOYAGE_API_KEY", None
|
||||
)
|
||||
if voyage_api_key:
|
||||
api_key_secretstr = convert_to_secret_str(voyage_api_key)
|
||||
values["voyage_api_key"] = api_key_secretstr
|
||||
|
||||
api_key_str = api_key_secretstr.get_secret_value()
|
||||
else:
|
||||
api_key_str = None
|
||||
|
||||
values["client"] = voyageai.Client(api_key=api_key_str)
|
||||
values["aclient"] = voyageai.AsyncClient(api_key=api_key_str)
|
||||
|
||||
return values
|
||||
|
||||
def _rerank(
|
||||
self,
|
||||
documents: Sequence[Union[str, Document]],
|
||||
query: str,
|
||||
) -> RerankingObject:
|
||||
"""Returns an ordered list of documents ordered by their relevance
|
||||
to the provided query.
|
||||
|
||||
Args:
|
||||
query: The query to use for reranking.
|
||||
documents: A sequence of documents to rerank.
|
||||
"""
|
||||
docs = [
|
||||
doc.page_content if isinstance(doc, Document) else doc for doc in documents
|
||||
]
|
||||
return self.client.rerank(
|
||||
query=query,
|
||||
documents=docs,
|
||||
model=self.model,
|
||||
top_k=self.top_k,
|
||||
truncation=self.truncation,
|
||||
)
|
||||
|
||||
async def _arerank(
|
||||
self,
|
||||
documents: Sequence[Union[str, Document]],
|
||||
query: str,
|
||||
) -> RerankingObject:
|
||||
"""Returns an ordered list of documents ordered by their relevance
|
||||
to the provided query.
|
||||
|
||||
Args:
|
||||
query: The query to use for reranking.
|
||||
documents: A sequence of documents to rerank.
|
||||
"""
|
||||
docs = [
|
||||
doc.page_content if isinstance(doc, Document) else doc for doc in documents
|
||||
]
|
||||
return await self.aclient.rerank(
|
||||
query=query,
|
||||
documents=docs,
|
||||
model=self.model,
|
||||
top_k=self.top_k,
|
||||
truncation=self.truncation,
|
||||
)
|
||||
|
||||
def compress_documents(
|
||||
self,
|
||||
documents: Sequence[Document],
|
||||
query: str,
|
||||
callbacks: Optional[Callbacks] = None,
|
||||
) -> Sequence[Document]:
|
||||
"""
|
||||
Compress documents using VoyageAI's rerank API.
|
||||
|
||||
Args:
|
||||
documents: A sequence of documents to compress.
|
||||
query: The query to use for compressing the documents.
|
||||
callbacks: Callbacks to run during the compression process.
|
||||
|
||||
Returns:
|
||||
A sequence of compressed documents in relevance_score order.
|
||||
"""
|
||||
if len(documents) == 0:
|
||||
return []
|
||||
|
||||
compressed = []
|
||||
for res in self._rerank(documents, query).results:
|
||||
doc = documents[res.index]
|
||||
doc_copy = Document(doc.page_content, metadata=deepcopy(doc.metadata))
|
||||
doc_copy.metadata["relevance_score"] = res.relevance_score
|
||||
compressed.append(doc_copy)
|
||||
return compressed
|
||||
|
||||
async def acompress_documents(
|
||||
self,
|
||||
documents: Sequence[Document],
|
||||
query: str,
|
||||
callbacks: Optional[Callbacks] = None,
|
||||
) -> Sequence[Document]:
|
||||
"""
|
||||
Compress documents using VoyageAI's rerank API.
|
||||
|
||||
Args:
|
||||
documents: A sequence of documents to compress.
|
||||
query: The query to use for compressing the documents.
|
||||
callbacks: Callbacks to run during the compression process.
|
||||
|
||||
Returns:
|
||||
A sequence of compressed documents in relevance_score order.
|
||||
"""
|
||||
if len(documents) == 0:
|
||||
return []
|
||||
|
||||
compressed = []
|
||||
for res in (await self._arerank(documents, query)).results:
|
||||
doc = documents[res.index]
|
||||
doc_copy = Document(doc.page_content, metadata=deepcopy(doc.metadata))
|
||||
doc_copy.metadata["relevance_score"] = res.relevance_score
|
||||
compressed.append(doc_copy)
|
||||
return compressed
|
@ -1,66 +0,0 @@
|
||||
[build-system]
|
||||
requires = ["pdm-backend"]
|
||||
build-backend = "pdm.backend"
|
||||
|
||||
[tool.pdm.build]
|
||||
includes = []
|
||||
|
||||
[project]
|
||||
authors = []
|
||||
license = { text = "MIT" }
|
||||
requires-python = "<3.13,>=3.9"
|
||||
dependencies = [
|
||||
"langchain-core<1.0.0,>=0.3.29",
|
||||
"voyageai<1,>=0.3.2",
|
||||
"pydantic<3,>=2",
|
||||
]
|
||||
name = "langchain-voyageai"
|
||||
version = "0.1.4"
|
||||
description = "An integration package connecting VoyageAI and LangChain"
|
||||
readme = "README.md"
|
||||
|
||||
[project.urls]
|
||||
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/voyageai"
|
||||
"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-voyageai%3D%3D0%22&expanded=true"
|
||||
repository = "https://github.com/langchain-ai/langchain"
|
||||
|
||||
[dependency-groups]
|
||||
test = [
|
||||
"pytest<8.0.0,>=7.3.0",
|
||||
"freezegun<2.0.0,>=1.2.2",
|
||||
"pytest-mock<4.0.0,>=3.10.0",
|
||||
"syrupy<5.0.0,>=4.0.2",
|
||||
"pytest-watcher<1.0.0,>=0.3.4",
|
||||
"pytest-asyncio<1.0.0,>=0.21.1",
|
||||
"pytest-socket<1.0.0,>=0.7.0",
|
||||
"langchain-core",
|
||||
]
|
||||
codespell = ["codespell<3.0.0,>=2.2.0"]
|
||||
test_integration = []
|
||||
lint = ["ruff<1.0.0,>=0.1.5"]
|
||||
dev = ["langchain-core"]
|
||||
typing = ["mypy<2.0,>=1.10", "langchain-core"]
|
||||
|
||||
[tool.uv.sources]
|
||||
langchain-core = { path = "../../core", editable = true }
|
||||
|
||||
[tool.mypy]
|
||||
disallow_untyped_defs = "True"
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py39"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "UP"]
|
||||
ignore = [ "UP007", ]
|
||||
|
||||
[tool.coverage.run]
|
||||
omit = ["tests/*"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = "--strict-markers --strict-config --durations=5"
|
||||
markers = [
|
||||
"requires: mark tests as requiring a specific library",
|
||||
"compile: mark placeholder test used to compile integration tests without running them",
|
||||
]
|
||||
asyncio_mode = "auto"
|
@ -1,17 +0,0 @@
|
||||
import sys
|
||||
import traceback
|
||||
from importlib.machinery import SourceFileLoader
|
||||
|
||||
if __name__ == "__main__":
|
||||
files = sys.argv[1:]
|
||||
has_failure = False
|
||||
for file in files:
|
||||
try:
|
||||
SourceFileLoader("x", file).load_module()
|
||||
except Exception:
|
||||
has_failure = True
|
||||
print(file)
|
||||
traceback.print_exc()
|
||||
print()
|
||||
|
||||
sys.exit(1 if has_failure else 0)
|
@ -1,17 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eu
|
||||
|
||||
# Initialize a variable to keep track of errors
|
||||
errors=0
|
||||
|
||||
# make sure not importing from langchain or langchain_experimental
|
||||
git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
|
||||
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
|
||||
|
||||
# Decide on an exit status based on the errors
|
||||
if [ "$errors" -gt 0 ]; then
|
||||
exit 1
|
||||
else
|
||||
exit 0
|
||||
fi
|
@ -1,7 +0,0 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.compile
|
||||
def test_placeholder() -> None:
|
||||
"""Used for compiling integration tests without running any real tests."""
|
||||
pass
|
@ -1,62 +0,0 @@
|
||||
"""Test VoyageAI embeddings."""
|
||||
|
||||
from langchain_voyageai import VoyageAIEmbeddings
|
||||
|
||||
# Please set VOYAGE_API_KEY in the environment variables
|
||||
MODEL = "voyage-2"
|
||||
|
||||
|
||||
def test_langchain_voyageai_embedding_documents() -> None:
|
||||
"""Test voyage embeddings."""
|
||||
documents = ["foo bar"]
|
||||
embedding = VoyageAIEmbeddings(model=MODEL) # type: ignore[call-arg]
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 1
|
||||
assert len(output[0]) == 1024
|
||||
|
||||
|
||||
def test_langchain_voyageai_embedding_documents_multiple() -> None:
|
||||
"""Test voyage embeddings."""
|
||||
documents = ["foo bar", "bar foo", "foo"]
|
||||
embedding = VoyageAIEmbeddings(model=MODEL, batch_size=2)
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 3
|
||||
assert len(output[0]) == 1024
|
||||
assert len(output[1]) == 1024
|
||||
assert len(output[2]) == 1024
|
||||
|
||||
|
||||
def test_langchain_voyageai_embedding_query() -> None:
|
||||
"""Test voyage embeddings."""
|
||||
document = "foo bar"
|
||||
embedding = VoyageAIEmbeddings(model=MODEL) # type: ignore[call-arg]
|
||||
output = embedding.embed_query(document)
|
||||
assert len(output) == 1024
|
||||
|
||||
|
||||
async def test_langchain_voyageai_async_embedding_documents_multiple() -> None:
|
||||
"""Test voyage embeddings."""
|
||||
documents = ["foo bar", "bar foo", "foo"]
|
||||
embedding = VoyageAIEmbeddings(model=MODEL, batch_size=2)
|
||||
output = await embedding.aembed_documents(documents)
|
||||
assert len(output) == 3
|
||||
assert len(output[0]) == 1024
|
||||
assert len(output[1]) == 1024
|
||||
assert len(output[2]) == 1024
|
||||
|
||||
|
||||
async def test_langchain_voyageai_async_embedding_query() -> None:
|
||||
"""Test voyage embeddings."""
|
||||
document = "foo bar"
|
||||
embedding = VoyageAIEmbeddings(model=MODEL) # type: ignore[call-arg]
|
||||
output = await embedding.aembed_query(document)
|
||||
assert len(output) == 1024
|
||||
|
||||
|
||||
def test_langchain_voyageai_embedding_documents_with_output_dimension() -> None:
|
||||
"""Test voyage embeddings."""
|
||||
documents = ["foo bar"]
|
||||
embedding = VoyageAIEmbeddings(model="voyage-3-large", output_dimension=256) # type: ignore[call-arg]
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 1
|
||||
assert len(output[0]) == 256
|
@ -1,68 +0,0 @@
|
||||
"""Test the voyageai reranker."""
|
||||
|
||||
import os
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_voyageai.rerank import VoyageAIRerank
|
||||
|
||||
|
||||
def test_voyageai_reranker_init() -> None:
|
||||
"""Test the voyageai reranker initializes correctly."""
|
||||
VoyageAIRerank(voyage_api_key="foo", model="foo") # type: ignore[arg-type]
|
||||
|
||||
|
||||
def test_sync() -> None:
|
||||
rerank = VoyageAIRerank(
|
||||
voyage_api_key=os.environ["VOYAGE_API_KEY"], # type: ignore[arg-type]
|
||||
model="rerank-lite-1",
|
||||
)
|
||||
doc_list = [
|
||||
"The Mediterranean diet emphasizes fish, olive oil, and vegetables"
|
||||
", believed to reduce chronic diseases.",
|
||||
"Photosynthesis in plants converts light energy into glucose and "
|
||||
"produces essential oxygen.",
|
||||
"20th-century innovations, from radios to smartphones, centered "
|
||||
"on electronic advancements.",
|
||||
"Rivers provide water, irrigation, and habitat for aquatic species, "
|
||||
"vital for ecosystems.",
|
||||
"Apple’s conference call to discuss fourth fiscal quarter results and "
|
||||
"business updates is scheduled for Thursday, November 2, 2023 at 2:00 "
|
||||
"p.m. PT / 5:00 p.m. ET.",
|
||||
"Shakespeare's works, like 'Hamlet' and 'A Midsummer Night's Dream,' "
|
||||
"endure in literature.",
|
||||
]
|
||||
documents = [Document(page_content=x) for x in doc_list]
|
||||
|
||||
result = rerank.compress_documents(
|
||||
query="When is the Apple's conference call scheduled?", documents=documents
|
||||
)
|
||||
assert len(doc_list) == len(result)
|
||||
|
||||
|
||||
async def test_async() -> None:
|
||||
rerank = VoyageAIRerank(
|
||||
voyage_api_key=os.environ["VOYAGE_API_KEY"], # type: ignore[arg-type]
|
||||
model="rerank-lite-1",
|
||||
)
|
||||
doc_list = [
|
||||
"The Mediterranean diet emphasizes fish, olive oil, and vegetables"
|
||||
", believed to reduce chronic diseases.",
|
||||
"Photosynthesis in plants converts light energy into glucose and "
|
||||
"produces essential oxygen.",
|
||||
"20th-century innovations, from radios to smartphones, centered "
|
||||
"on electronic advancements.",
|
||||
"Rivers provide water, irrigation, and habitat for aquatic species, "
|
||||
"vital for ecosystems.",
|
||||
"Apple’s conference call to discuss fourth fiscal quarter results and "
|
||||
"business updates is scheduled for Thursday, November 2, 2023 at 2:00 "
|
||||
"p.m. PT / 5:00 p.m. ET.",
|
||||
"Shakespeare's works, like 'Hamlet' and 'A Midsummer Night's Dream,' "
|
||||
"endure in literature.",
|
||||
]
|
||||
documents = [Document(page_content=x) for x in doc_list]
|
||||
|
||||
result = await rerank.acompress_documents(
|
||||
query="When is the Apple's conference call scheduled?", documents=documents
|
||||
)
|
||||
assert len(doc_list) == len(result)
|
@ -1,61 +0,0 @@
|
||||
"""Test embedding model integration."""
|
||||
|
||||
from langchain_core.embeddings import Embeddings
|
||||
|
||||
from langchain_voyageai import VoyageAIEmbeddings
|
||||
|
||||
MODEL = "voyage-2"
|
||||
|
||||
|
||||
def test_initialization_voyage_2() -> None:
|
||||
"""Test embedding model initialization."""
|
||||
emb = VoyageAIEmbeddings(api_key="NOT_A_VALID_KEY", model=MODEL) # type: ignore
|
||||
assert isinstance(emb, Embeddings)
|
||||
assert emb.batch_size == 72
|
||||
assert emb.model == MODEL
|
||||
assert emb._client is not None
|
||||
|
||||
|
||||
def test_initialization_voyage_2_with_full_api_key_name() -> None:
|
||||
"""Test embedding model initialization."""
|
||||
# Testing that we can initialize the model using `voyage_api_key`
|
||||
# instead of `api_key`
|
||||
emb = VoyageAIEmbeddings(voyage_api_key="NOT_A_VALID_KEY", model=MODEL) # type: ignore
|
||||
assert isinstance(emb, Embeddings)
|
||||
assert emb.batch_size == 72
|
||||
assert emb.model == MODEL
|
||||
assert emb._client is not None
|
||||
|
||||
|
||||
def test_initialization_voyage_1() -> None:
|
||||
"""Test embedding model initialization."""
|
||||
emb = VoyageAIEmbeddings(api_key="NOT_A_VALID_KEY", model="voyage-01") # type: ignore
|
||||
assert isinstance(emb, Embeddings)
|
||||
assert emb.batch_size == 7
|
||||
assert emb.model == "voyage-01"
|
||||
assert emb._client is not None
|
||||
|
||||
|
||||
def test_initialization_voyage_1_batch_size() -> None:
|
||||
"""Test embedding model initialization."""
|
||||
emb = VoyageAIEmbeddings(
|
||||
api_key="NOT_A_VALID_KEY", # type: ignore
|
||||
model="voyage-01",
|
||||
batch_size=15,
|
||||
)
|
||||
assert isinstance(emb, Embeddings)
|
||||
assert emb.batch_size == 15
|
||||
assert emb.model == "voyage-01"
|
||||
assert emb._client is not None
|
||||
|
||||
|
||||
def test_initialization_with_output_dimension() -> None:
|
||||
emb = VoyageAIEmbeddings(
|
||||
api_key="NOT_A_VALID_KEY", # type: ignore
|
||||
model="voyage-3-large",
|
||||
output_dimension=256,
|
||||
batch_size=10,
|
||||
)
|
||||
assert isinstance(emb, Embeddings)
|
||||
assert emb.model == "voyage-3-large"
|
||||
assert emb.output_dimension == 256
|
@ -1,10 +0,0 @@
|
||||
from langchain_voyageai import __all__
|
||||
|
||||
EXPECTED_ALL = [
|
||||
"VoyageAIEmbeddings",
|
||||
"VoyageAIRerank",
|
||||
]
|
||||
|
||||
|
||||
def test_all_imports() -> None:
|
||||
assert sorted(EXPECTED_ALL) == sorted(__all__)
|
@ -1,83 +0,0 @@
|
||||
from collections import namedtuple
|
||||
from typing import Any
|
||||
|
||||
import pytest # type: ignore
|
||||
from langchain_core.documents import Document
|
||||
from voyageai.api_resources import VoyageResponse # type: ignore
|
||||
from voyageai.object import RerankingObject # type: ignore
|
||||
|
||||
from langchain_voyageai.rerank import VoyageAIRerank
|
||||
|
||||
doc_list = [
|
||||
"The Mediterranean diet emphasizes fish, olive oil, and vegetables"
|
||||
", believed to reduce chronic diseases.",
|
||||
"Photosynthesis in plants converts light energy into glucose and "
|
||||
"produces essential oxygen.",
|
||||
"20th-century innovations, from radios to smartphones, centered "
|
||||
"on electronic advancements.",
|
||||
"Rivers provide water, irrigation, and habitat for aquatic species, "
|
||||
"vital for ecosystems.",
|
||||
"Apple’s conference call to discuss fourth fiscal quarter results and "
|
||||
"business updates is scheduled for Thursday, November 2, 2023 at 2:00 "
|
||||
"p.m. PT / 5:00 p.m. ET.",
|
||||
"Shakespeare's works, like 'Hamlet' and 'A Midsummer Night's Dream,' "
|
||||
"endure in literature.",
|
||||
]
|
||||
documents = [Document(page_content=x) for x in doc_list]
|
||||
|
||||
|
||||
@pytest.mark.requires("voyageai")
|
||||
def test_init() -> None:
|
||||
VoyageAIRerank(
|
||||
voyage_api_key="foo", # type: ignore[arg-type]
|
||||
model="rerank-lite-1",
|
||||
)
|
||||
|
||||
|
||||
def get_mock_rerank_result() -> RerankingObject:
|
||||
VoyageResultItem = namedtuple("VoyageResultItem", ["index", "relevance_score"])
|
||||
Usage = namedtuple("Usage", ["total_tokens"])
|
||||
voyage_response = VoyageResponse()
|
||||
voyage_response.data = [
|
||||
VoyageResultItem(index=1, relevance_score=0.9),
|
||||
VoyageResultItem(index=0, relevance_score=0.8),
|
||||
]
|
||||
voyage_response.usage = Usage(total_tokens=255)
|
||||
return RerankingObject(response=voyage_response, documents=doc_list)
|
||||
|
||||
|
||||
@pytest.mark.requires("voyageai")
|
||||
def test_rerank_unit_test(mocker: Any) -> None:
|
||||
mocker.patch("voyageai.Client.rerank").return_value = get_mock_rerank_result()
|
||||
expected_result = [
|
||||
Document(
|
||||
page_content="Photosynthesis in plants converts light energy into "
|
||||
"glucose and produces essential oxygen.",
|
||||
metadata={"relevance_score": 0.9},
|
||||
),
|
||||
Document(
|
||||
page_content="The Mediterranean diet emphasizes fish, olive oil, and "
|
||||
"vegetables, believed to reduce chronic diseases.",
|
||||
metadata={"relevance_score": 0.8},
|
||||
),
|
||||
]
|
||||
|
||||
rerank = VoyageAIRerank(
|
||||
voyage_api_key="foo", # type: ignore[arg-type]
|
||||
model="rerank-lite-1",
|
||||
)
|
||||
result = rerank.compress_documents(
|
||||
documents=documents, query="When is the Apple's conference call scheduled?"
|
||||
)
|
||||
assert expected_result == result
|
||||
|
||||
|
||||
def test_rerank_empty_input() -> None:
|
||||
rerank = VoyageAIRerank(
|
||||
voyage_api_key="foo", # type: ignore[arg-type]
|
||||
model="rerank-lite-1",
|
||||
)
|
||||
result = rerank.compress_documents(
|
||||
documents=[], query="When is the Apple's conference call scheduled?"
|
||||
)
|
||||
assert len(result) == 0
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user