mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-20 13:54:48 +00:00
scheduled tests GHA (#8879)
Adding scheduled daily GHA that runs marked integration tests. To start just marking some tests in test_openai
This commit is contained in:
parent
8f0cd91d57
commit
95cf7de112
@ -1,5 +1,5 @@
|
||||
---
|
||||
name: libs/langchain-experimental CI
|
||||
name: libs/experimental CI
|
||||
|
||||
on:
|
||||
push:
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
name: libs/langchain-experimental Release
|
||||
name: libs/experimental Release
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
|
38
.github/workflows/scheduled_test.yml
vendored
Normal file
38
.github/workflows/scheduled_test.yml
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
name: Scheduled tests
|
||||
|
||||
on:
|
||||
scheduled:
|
||||
- cron: '0 13 * * *'
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.4.2"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
environment: Scheduled testing
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: "1.4.2"
|
||||
install-command: |
|
||||
echo "Running scheduled tests, installing dependencies with poetry..."
|
||||
poetry install -E scheduled_testing
|
||||
- name: Run tests
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
make scheduled_tests
|
||||
shell: bash
|
||||
secrets: inherit
|
@ -7,6 +7,10 @@ from unittest import mock
|
||||
|
||||
import pydantic
|
||||
import pytest
|
||||
from langchain import OpenAI
|
||||
from langchain.output_parsers import PydanticOutputParser
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
from langchain_experimental.cpal.base import (
|
||||
CausalChain,
|
||||
CPALChain,
|
||||
@ -35,10 +39,6 @@ from langchain_experimental.cpal.templates.univariate.narrative import (
|
||||
from langchain_experimental.cpal.templates.univariate.query import (
|
||||
template as query_template,
|
||||
)
|
||||
|
||||
from langchain import OpenAI
|
||||
from langchain.output_parsers import PydanticOutputParser
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
from tests.unit_tests.llms.fake_llm import FakeLLM
|
||||
|
||||
|
@ -1,12 +1,11 @@
|
||||
"""Test SQL Database Chain."""
|
||||
from sqlalchemy import Column, Integer, MetaData, String, Table, create_engine, insert
|
||||
|
||||
from langchain.chains.sql_database.base import (
|
||||
from langchain.llms.openai import OpenAI
|
||||
from langchain.utilities.sql_database import SQLDatabase
|
||||
from libs.experimental.langchain_experimental.sql.base import (
|
||||
SQLDatabaseChain,
|
||||
SQLDatabaseSequentialChain,
|
||||
)
|
||||
from langchain.llms.openai import OpenAI
|
||||
from langchain.utilities.sql_database import SQLDatabase
|
||||
from sqlalchemy import Column, Integer, MetaData, String, Table, create_engine, insert
|
||||
|
||||
metadata_obj = MetaData()
|
||||
|
@ -59,6 +59,9 @@ test_watch:
|
||||
integration_tests:
|
||||
poetry run pytest tests/integration_tests
|
||||
|
||||
scheduled_tests:
|
||||
poetry run pytest -m scheduled tests/integration_tests
|
||||
|
||||
docker_tests:
|
||||
docker build -t my-langchain-image:test .
|
||||
docker run --rm my-langchain-image:test
|
||||
|
588
libs/langchain/poetry.lock
generated
588
libs/langchain/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -375,6 +375,10 @@ extended_testing = [
|
||||
"xmltodict",
|
||||
]
|
||||
|
||||
scheduled_testing = [
|
||||
"openai",
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
select = [
|
||||
"E", # pycodestyle
|
||||
@ -413,7 +417,8 @@ addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused
|
||||
# Registering custom markers.
|
||||
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
|
||||
markers = [
|
||||
"requires: mark tests as requiring a specific library"
|
||||
"requires: mark tests as requiring a specific library",
|
||||
"scheduled: mark tests to run in scheduled testing",
|
||||
]
|
||||
|
||||
[tool.codespell]
|
||||
|
@ -18,7 +18,7 @@ except ImportError:
|
||||
gptcache_installed = False
|
||||
|
||||
|
||||
def init_gptcache_map(cache_obj: Cache) -> None:
|
||||
def init_gptcache_map(cache_obj: Any) -> None:
|
||||
i = getattr(init_gptcache_map, "_i", 0)
|
||||
cache_path = f"data_map_{i}.txt"
|
||||
if os.path.isfile(cache_path):
|
||||
@ -30,7 +30,7 @@ def init_gptcache_map(cache_obj: Cache) -> None:
|
||||
init_gptcache_map._i = i + 1 # type: ignore
|
||||
|
||||
|
||||
def init_gptcache_map_with_llm(cache_obj: Cache, llm: str) -> None:
|
||||
def init_gptcache_map_with_llm(cache_obj: Any, llm: str) -> None:
|
||||
cache_path = f"data_map_{llm}.txt"
|
||||
if os.path.isfile(cache_path):
|
||||
os.remove(cache_path)
|
||||
|
@ -11,7 +11,6 @@ from datetime import timedelta
|
||||
from typing import Iterator
|
||||
|
||||
import pytest
|
||||
from momento import CacheClient, Configurations, CredentialProvider
|
||||
|
||||
import langchain
|
||||
from langchain.cache import MomentoCache
|
||||
@ -25,6 +24,8 @@ def random_string() -> str:
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def momento_cache() -> Iterator[MomentoCache]:
|
||||
from momento import CacheClient, Configurations, CredentialProvider
|
||||
|
||||
cache_name = f"langchain-test-cache-{random_string()}"
|
||||
client = CacheClient(
|
||||
Configurations.Laptop.v1(),
|
||||
@ -40,6 +41,8 @@ def momento_cache() -> Iterator[MomentoCache]:
|
||||
|
||||
|
||||
def test_invalid_ttl() -> None:
|
||||
from momento import CacheClient, Configurations, CredentialProvider
|
||||
|
||||
client = CacheClient(
|
||||
Configurations.Laptop.v1(),
|
||||
CredentialProvider.from_environment_variable("MOMENTO_AUTH_TOKEN"),
|
||||
|
@ -1,6 +1,5 @@
|
||||
"""Test Redis cache functionality."""
|
||||
import pytest
|
||||
import redis
|
||||
|
||||
import langchain
|
||||
from langchain.cache import RedisCache, RedisSemanticCache
|
||||
@ -13,6 +12,8 @@ REDIS_TEST_URL = "redis://localhost:6379"
|
||||
|
||||
|
||||
def test_redis_cache() -> None:
|
||||
import redis
|
||||
|
||||
langchain.llm_cache = RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL))
|
||||
llm = FakeLLM()
|
||||
params = llm.dict()
|
||||
@ -31,6 +32,8 @@ def test_redis_cache() -> None:
|
||||
|
||||
|
||||
def test_redis_cache_chat() -> None:
|
||||
import redis
|
||||
|
||||
langchain.llm_cache = RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL))
|
||||
llm = FakeChatModel()
|
||||
params = llm.dict()
|
||||
@ -69,6 +72,8 @@ def test_redis_semantic_cache() -> None:
|
||||
|
||||
|
||||
def test_redis_semantic_cache_chat() -> None:
|
||||
import redis
|
||||
|
||||
langchain.llm_cache = RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL))
|
||||
llm = FakeChatModel()
|
||||
params = llm.dict()
|
||||
|
@ -2,8 +2,6 @@ import os
|
||||
from contextlib import ExitStack
|
||||
from pathlib import Path
|
||||
|
||||
from unstructured.cleaners.core import clean_extra_whitespace
|
||||
|
||||
from langchain.document_loaders import (
|
||||
UnstructuredAPIFileIOLoader,
|
||||
UnstructuredAPIFileLoader,
|
||||
@ -14,6 +12,8 @@ EXAMPLE_DOCS_DIRECTORY = str(Path(__file__).parent.parent / "examples/")
|
||||
|
||||
|
||||
def test_unstructured_loader_with_post_processor() -> None:
|
||||
from unstructured.cleaners.core import clean_extra_whitespace
|
||||
|
||||
file_path = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper.pdf")
|
||||
loader = UnstructuredFileLoader(
|
||||
file_path=file_path,
|
||||
|
@ -1,15 +1,13 @@
|
||||
"""Test Baseten API wrapper."""
|
||||
import os
|
||||
|
||||
import baseten
|
||||
import pytest
|
||||
|
||||
from langchain.llms.baseten import Baseten
|
||||
|
||||
|
||||
@pytest.mark.requires(baseten)
|
||||
def test_baseten_call() -> None:
|
||||
"""Test valid call to Baseten."""
|
||||
import baseten
|
||||
|
||||
baseten.login(os.environ["BASETEN_API_KEY"])
|
||||
llm = Baseten(model=os.environ["BASETEN_MODEL_ID"])
|
||||
output = llm("Say foo:")
|
||||
|
@ -7,16 +7,17 @@ import pytest
|
||||
from langchain.callbacks.manager import CallbackManager
|
||||
from langchain.chat_models.openai import ChatOpenAI
|
||||
from langchain.llms.loading import load_llm
|
||||
from langchain.llms.openai import OpenAI, OpenAIChat
|
||||
from langchain.llms.openai import OpenAI
|
||||
from langchain.schema import LLMResult
|
||||
from tests.unit_tests.callbacks.fake_callback_handler import (
|
||||
FakeCallbackHandler,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.scheduled
|
||||
def test_openai_call() -> None:
|
||||
"""Test valid call to openai."""
|
||||
llm = OpenAI(max_tokens=10, n=3)
|
||||
llm = OpenAI()
|
||||
output = llm("Say something nice:")
|
||||
assert isinstance(output, str)
|
||||
|
||||
@ -86,6 +87,7 @@ def test_saving_loading_llm(tmp_path: Path) -> None:
|
||||
assert loaded_llm == llm
|
||||
|
||||
|
||||
@pytest.mark.scheduled
|
||||
def test_openai_streaming() -> None:
|
||||
"""Test streaming tokens from OpenAI."""
|
||||
llm = OpenAI(max_tokens=10)
|
||||
@ -97,6 +99,7 @@ def test_openai_streaming() -> None:
|
||||
assert isinstance(token, str)
|
||||
|
||||
|
||||
@pytest.mark.scheduled
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_astream() -> None:
|
||||
"""Test streaming tokens from OpenAI."""
|
||||
@ -106,6 +109,7 @@ async def test_openai_astream() -> None:
|
||||
assert isinstance(token, str)
|
||||
|
||||
|
||||
@pytest.mark.scheduled
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_abatch() -> None:
|
||||
"""Test streaming tokens from OpenAI."""
|
||||
@ -128,6 +132,7 @@ async def test_openai_abatch_tags() -> None:
|
||||
assert isinstance(token, str)
|
||||
|
||||
|
||||
@pytest.mark.scheduled
|
||||
def test_openai_batch() -> None:
|
||||
"""Test streaming tokens from OpenAI."""
|
||||
llm = OpenAI(max_tokens=10)
|
||||
@ -137,6 +142,7 @@ def test_openai_batch() -> None:
|
||||
assert isinstance(token, str)
|
||||
|
||||
|
||||
@pytest.mark.scheduled
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_ainvoke() -> None:
|
||||
"""Test streaming tokens from OpenAI."""
|
||||
@ -146,6 +152,7 @@ async def test_openai_ainvoke() -> None:
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
@pytest.mark.scheduled
|
||||
def test_openai_invoke() -> None:
|
||||
"""Test streaming tokens from OpenAI."""
|
||||
llm = OpenAI(max_tokens=10)
|
||||
@ -154,6 +161,7 @@ def test_openai_invoke() -> None:
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
@pytest.mark.scheduled
|
||||
def test_openai_multiple_prompts() -> None:
|
||||
"""Test completion with multiple prompts."""
|
||||
llm = OpenAI(max_tokens=10)
|
||||
@ -181,6 +189,7 @@ def test_openai_streaming_multiple_prompts_error() -> None:
|
||||
OpenAI(streaming=True).generate(["I'm Pickle Rick", "I'm Pickle Rick"])
|
||||
|
||||
|
||||
@pytest.mark.scheduled
|
||||
def test_openai_streaming_call() -> None:
|
||||
"""Test valid call to openai."""
|
||||
llm = OpenAI(max_tokens=10, streaming=True)
|
||||
@ -203,6 +212,7 @@ def test_openai_streaming_callback() -> None:
|
||||
assert callback_handler.llm_streams == 10
|
||||
|
||||
|
||||
@pytest.mark.scheduled
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_async_generate() -> None:
|
||||
"""Test async generation."""
|
||||
@ -235,60 +245,6 @@ def test_openai_chat_wrong_class() -> None:
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_openai_chat() -> None:
|
||||
"""Test OpenAIChat."""
|
||||
llm = OpenAIChat(max_tokens=10)
|
||||
output = llm("Say foo:")
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_openai_chat_streaming() -> None:
|
||||
"""Test OpenAIChat with streaming option."""
|
||||
llm = OpenAIChat(max_tokens=10, streaming=True)
|
||||
output = llm("Say foo:")
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_openai_chat_streaming_callback() -> None:
|
||||
"""Test that streaming correctly invokes on_llm_new_token callback."""
|
||||
callback_handler = FakeCallbackHandler()
|
||||
callback_manager = CallbackManager([callback_handler])
|
||||
llm = OpenAIChat(
|
||||
max_tokens=10,
|
||||
streaming=True,
|
||||
temperature=0,
|
||||
callback_manager=callback_manager,
|
||||
verbose=True,
|
||||
)
|
||||
llm("Write me a sentence with 100 words.")
|
||||
assert callback_handler.llm_streams != 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_chat_async_generate() -> None:
|
||||
"""Test async chat."""
|
||||
llm = OpenAIChat(max_tokens=10)
|
||||
output = await llm.agenerate(["Hello, how are you?"])
|
||||
assert isinstance(output, LLMResult)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_chat_async_streaming_callback() -> None:
|
||||
"""Test that streaming correctly invokes on_llm_new_token callback."""
|
||||
callback_handler = FakeCallbackHandler()
|
||||
callback_manager = CallbackManager([callback_handler])
|
||||
llm = OpenAIChat(
|
||||
max_tokens=10,
|
||||
streaming=True,
|
||||
temperature=0,
|
||||
callback_manager=callback_manager,
|
||||
verbose=True,
|
||||
)
|
||||
result = await llm.agenerate(["Write me a sentence with 100 words."])
|
||||
assert callback_handler.llm_streams != 0
|
||||
assert isinstance(result, LLMResult)
|
||||
|
||||
|
||||
def test_openai_modelname_to_contextsize_valid() -> None:
|
||||
"""Test model name to context size on a valid model."""
|
||||
assert OpenAI().modelname_to_contextsize("davinci") == 2049
|
||||
|
@ -6,8 +6,8 @@ from langchain.memory.chat_message_histories import CosmosDBChatMessageHistory
|
||||
from langchain.schema.messages import _message_to_dict
|
||||
|
||||
# Replace these with your Azure Cosmos DB endpoint and key
|
||||
endpoint = os.environ["COSMOS_DB_ENDPOINT"]
|
||||
credential = os.environ["COSMOS_DB_KEY"]
|
||||
endpoint = os.environ.get("COSMOS_DB_ENDPOINT", "")
|
||||
credential = os.environ.get("COSMOS_DB_KEY", "")
|
||||
|
||||
|
||||
def test_memory_with_message_store() -> None:
|
||||
|
@ -10,7 +10,6 @@ from datetime import timedelta
|
||||
from typing import Iterator
|
||||
|
||||
import pytest
|
||||
from momento import CacheClient, Configurations, CredentialProvider
|
||||
|
||||
from langchain.memory import ConversationBufferMemory
|
||||
from langchain.memory.chat_message_histories import MomentoChatMessageHistory
|
||||
@ -23,6 +22,8 @@ def random_string() -> str:
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def message_history() -> Iterator[MomentoChatMessageHistory]:
|
||||
from momento import CacheClient, Configurations, CredentialProvider
|
||||
|
||||
cache_name = f"langchain-test-cache-{random_string()}"
|
||||
client = CacheClient(
|
||||
Configurations.Laptop.v1(),
|
||||
|
@ -6,7 +6,7 @@ from langchain.memory.chat_message_histories import MongoDBChatMessageHistory
|
||||
from langchain.schema.messages import _message_to_dict
|
||||
|
||||
# Replace these with your mongodb connection string
|
||||
connection_string = os.environ["MONGODB_CONNECTION_STRING"]
|
||||
connection_string = os.environ.get("MONGODB_CONNECTION_STRING", "")
|
||||
|
||||
|
||||
def test_memory_with_message_store() -> None:
|
||||
|
@ -1,43 +1,30 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Generator, Tuple
|
||||
from typing import TYPE_CHECKING, Any, Dict, Generator, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from docarray import BaseDoc
|
||||
from docarray.index import (
|
||||
ElasticDocIndex,
|
||||
HnswDocumentIndex,
|
||||
InMemoryExactNNIndex,
|
||||
QdrantDocumentIndex,
|
||||
WeaviateDocumentIndex,
|
||||
)
|
||||
from docarray.typing import NdArray
|
||||
from pydantic import Field
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docarray.index import (
|
||||
ElasticDocIndex,
|
||||
HnswDocumentIndex,
|
||||
InMemoryExactNNIndex,
|
||||
QdrantDocumentIndex,
|
||||
WeaviateDocumentIndex,
|
||||
)
|
||||
from docarray.typing import NdArray
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
from langchain.embeddings import FakeEmbeddings
|
||||
|
||||
|
||||
class MyDoc(BaseDoc):
|
||||
title: str
|
||||
title_embedding: NdArray[32] # type: ignore
|
||||
other_emb: NdArray[32] # type: ignore
|
||||
year: int
|
||||
|
||||
|
||||
class WeaviateDoc(BaseDoc):
|
||||
# When initializing the Weaviate index, denote the field
|
||||
# you want to search on with `is_embedding=True`
|
||||
title: str
|
||||
title_embedding: NdArray[32] = Field(is_embedding=True) # type: ignore
|
||||
other_emb: NdArray[32] # type: ignore
|
||||
year: int
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def init_weaviate() -> (
|
||||
Generator[
|
||||
Tuple[WeaviateDocumentIndex[WeaviateDoc], Dict[str, Any], FakeEmbeddings],
|
||||
Tuple[WeaviateDocumentIndex, Dict[str, Any], FakeEmbeddings],
|
||||
None,
|
||||
None,
|
||||
]
|
||||
@ -46,6 +33,19 @@ def init_weaviate() -> (
|
||||
cd tests/integration_tests/vectorstores/docker-compose
|
||||
docker compose -f weaviate.yml up
|
||||
"""
|
||||
from docarray import BaseDoc
|
||||
from docarray.index import (
|
||||
WeaviateDocumentIndex,
|
||||
)
|
||||
|
||||
class WeaviateDoc(BaseDoc):
|
||||
# When initializing the Weaviate index, denote the field
|
||||
# you want to search on with `is_embedding=True`
|
||||
title: str
|
||||
title_embedding: NdArray[32] = Field(is_embedding=True) # type: ignore
|
||||
other_emb: NdArray[32] # type: ignore
|
||||
year: int
|
||||
|
||||
embeddings = FakeEmbeddings(size=32)
|
||||
|
||||
# initialize WeaviateDocumentIndex
|
||||
@ -76,12 +76,23 @@ def init_weaviate() -> (
|
||||
|
||||
@pytest.fixture
|
||||
def init_elastic() -> (
|
||||
Generator[Tuple[ElasticDocIndex[MyDoc], Dict[str, Any], FakeEmbeddings], None, None]
|
||||
Generator[Tuple[ElasticDocIndex, Dict[str, Any], FakeEmbeddings], None, None]
|
||||
):
|
||||
"""
|
||||
cd tests/integration_tests/vectorstores/docker-compose
|
||||
docker-compose -f elasticsearch.yml up
|
||||
"""
|
||||
from docarray import BaseDoc
|
||||
from docarray.index import (
|
||||
ElasticDocIndex,
|
||||
)
|
||||
|
||||
class MyDoc(BaseDoc):
|
||||
title: str
|
||||
title_embedding: NdArray[32] # type: ignore
|
||||
other_emb: NdArray[32] # type: ignore
|
||||
year: int
|
||||
|
||||
embeddings = FakeEmbeddings(size=32)
|
||||
|
||||
# initialize ElasticDocIndex
|
||||
@ -109,7 +120,16 @@ def init_elastic() -> (
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def init_qdrant() -> Tuple[QdrantDocumentIndex[MyDoc], rest.Filter, FakeEmbeddings]:
|
||||
def init_qdrant() -> Tuple[QdrantDocumentIndex, rest.Filter, FakeEmbeddings]:
|
||||
from docarray import BaseDoc
|
||||
from docarray.index import QdrantDocumentIndex
|
||||
|
||||
class MyDoc(BaseDoc):
|
||||
title: str
|
||||
title_embedding: NdArray[32] # type: ignore
|
||||
other_emb: NdArray[32] # type: ignore
|
||||
year: int
|
||||
|
||||
embeddings = FakeEmbeddings(size=32)
|
||||
|
||||
# initialize QdrantDocumentIndex
|
||||
@ -144,9 +164,16 @@ def init_qdrant() -> Tuple[QdrantDocumentIndex[MyDoc], rest.Filter, FakeEmbeddin
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def init_in_memory() -> (
|
||||
Tuple[InMemoryExactNNIndex[MyDoc], Dict[str, Any], FakeEmbeddings]
|
||||
):
|
||||
def init_in_memory() -> Tuple[InMemoryExactNNIndex, Dict[str, Any], FakeEmbeddings]:
|
||||
from docarray import BaseDoc
|
||||
from docarray.index import InMemoryExactNNIndex
|
||||
|
||||
class MyDoc(BaseDoc):
|
||||
title: str
|
||||
title_embedding: NdArray[32] # type: ignore
|
||||
other_emb: NdArray[32] # type: ignore
|
||||
year: int
|
||||
|
||||
embeddings = FakeEmbeddings(size=32)
|
||||
|
||||
# initialize InMemoryExactNNIndex
|
||||
@ -172,7 +199,18 @@ def init_in_memory() -> (
|
||||
@pytest.fixture
|
||||
def init_hnsw(
|
||||
tmp_path: Path,
|
||||
) -> Tuple[HnswDocumentIndex[MyDoc], Dict[str, Any], FakeEmbeddings]:
|
||||
) -> Tuple[HnswDocumentIndex, Dict[str, Any], FakeEmbeddings]:
|
||||
from docarray import BaseDoc
|
||||
from docarray.index import (
|
||||
HnswDocumentIndex,
|
||||
)
|
||||
|
||||
class MyDoc(BaseDoc):
|
||||
title: str
|
||||
title_embedding: NdArray[32] # type: ignore
|
||||
other_emb: NdArray[32] # type: ignore
|
||||
year: int
|
||||
|
||||
embeddings = FakeEmbeddings(size=32)
|
||||
|
||||
# initialize InMemoryExactNNIndex
|
||||
|
@ -1,7 +1,6 @@
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from vcr.request import Request
|
||||
|
||||
from langchain.retrievers import DocArrayRetriever
|
||||
from tests.integration_tests.retrievers.docarray.fixtures import ( # noqa: F401
|
||||
@ -17,7 +16,7 @@ from tests.integration_tests.retrievers.docarray.fixtures import ( # noqa: F401
|
||||
"backend",
|
||||
["init_hnsw", "init_in_memory", "init_qdrant", "init_elastic", "init_weaviate"],
|
||||
)
|
||||
def test_backends(request: Request, backend: Any) -> None:
|
||||
def test_backends(request: Any, backend: Any) -> None:
|
||||
index, filter_query, embeddings = request.getfixturevalue(backend)
|
||||
|
||||
# create a retriever
|
||||
|
@ -6,7 +6,6 @@ from typing import Generator, Union
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
from weaviate import Client
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever
|
||||
@ -28,6 +27,8 @@ class TestWeaviateHybridSearchRetriever:
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def weaviate_url(self) -> Union[str, Generator[str, None, None]]:
|
||||
"""Return the weaviate url."""
|
||||
from weaviate import Client
|
||||
|
||||
url = "http://localhost:8080"
|
||||
yield url
|
||||
|
||||
@ -38,6 +39,8 @@ class TestWeaviateHybridSearchRetriever:
|
||||
@pytest.mark.vcr(ignore_localhost=True)
|
||||
def test_get_relevant_documents(self, weaviate_url: str) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
from weaviate import Client
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
|
||||
@ -64,6 +67,8 @@ class TestWeaviateHybridSearchRetriever:
|
||||
@pytest.mark.vcr(ignore_localhost=True)
|
||||
def test_get_relevant_documents_with_score(self, weaviate_url: str) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
from weaviate import Client
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
|
||||
@ -87,6 +92,8 @@ class TestWeaviateHybridSearchRetriever:
|
||||
@pytest.mark.vcr(ignore_localhost=True)
|
||||
def test_get_relevant_documents_with_filter(self, weaviate_url: str) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
from weaviate import Client
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
|
||||
@ -113,6 +120,8 @@ class TestWeaviateHybridSearchRetriever:
|
||||
@pytest.mark.vcr(ignore_localhost=True)
|
||||
def test_get_relevant_documents_with_uuids(self, weaviate_url: str) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
from weaviate import Client
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
# Weaviate replaces the object if the UUID already exists
|
||||
|
@ -2,7 +2,6 @@ import uuid
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
from langchain.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
@ -69,6 +68,7 @@ async def test_qdrant_aadd_texts_stores_ids(
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
@ -101,6 +101,7 @@ async def test_qdrant_aadd_texts_stores_embeddings_as_named_vectors(
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
|
@ -2,7 +2,6 @@ from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores import Qdrant
|
||||
@ -239,6 +238,8 @@ async def test_qdrant_similarity_search_filters_with_qdrant_filters(
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
|
@ -2,7 +2,6 @@ import uuid
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores import Qdrant
|
||||
@ -81,6 +80,7 @@ def test_qdrant_add_texts_stores_duplicated_texts(vector_name: Optional[str]) ->
|
||||
def test_qdrant_add_texts_stores_ids(batch_size: int) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
@ -107,6 +107,7 @@ def test_qdrant_add_texts_stores_ids(batch_size: int) -> None:
|
||||
def test_qdrant_add_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
|
@ -2,7 +2,6 @@ from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.vectorstores import Qdrant
|
||||
@ -209,6 +208,8 @@ def test_qdrant_similarity_search_filters_with_qdrant_filters(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
|
@ -1,7 +1,6 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
import openai
|
||||
import pytest
|
||||
from dotenv import load_dotenv
|
||||
|
||||
@ -10,13 +9,7 @@ from langchain.vectorstores.azuresearch import AzureSearch
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Azure OpenAI settings
|
||||
openai.api_type = "azure"
|
||||
openai.api_base = os.getenv("OPENAI_API_BASE", "")
|
||||
openai.api_version = "2023-05-15"
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY", "")
|
||||
model: str = os.getenv("OPENAI_EMBEDDINGS_ENGINE_DOC", "text-embedding-ada-002")
|
||||
|
||||
model = os.getenv("OPENAI_EMBEDDINGS_ENGINE_DOC", "text-embedding-ada-002")
|
||||
# Vector store settings
|
||||
vector_store_address: str = os.getenv("AZURE_SEARCH_ENDPOINT", "")
|
||||
vector_store_password: str = os.getenv("AZURE_SEARCH_ADMIN_KEY", "")
|
||||
|
@ -1,5 +1,4 @@
|
||||
"""Test Deep Lake functionality."""
|
||||
import deeplake
|
||||
import pytest
|
||||
from pytest import FixtureRequest
|
||||
|
||||
@ -53,6 +52,8 @@ def test_deeplake_with_metadatas() -> None:
|
||||
|
||||
def test_deeplakewith_persistence() -> None:
|
||||
"""Test end to end construction and search, with persistence."""
|
||||
import deeplake
|
||||
|
||||
dataset_path = "./tests/persist_dir"
|
||||
if deeplake.exists(dataset_path):
|
||||
deeplake.delete(dataset_path)
|
||||
@ -84,6 +85,8 @@ def test_deeplakewith_persistence() -> None:
|
||||
|
||||
def test_deeplake_overwrite_flag() -> None:
|
||||
"""Test overwrite behavior"""
|
||||
import deeplake
|
||||
|
||||
dataset_path = "./tests/persist_dir"
|
||||
if deeplake.exists(dataset_path):
|
||||
deeplake.delete(dataset_path)
|
||||
@ -234,6 +237,8 @@ def test_delete_dataset_by_filter(deeplake_datastore: DeepLake) -> None:
|
||||
|
||||
def test_delete_by_path(deeplake_datastore: DeepLake) -> None:
|
||||
"""Test delete dataset."""
|
||||
import deeplake
|
||||
|
||||
path = deeplake_datastore.dataset_path
|
||||
DeepLake.force_delete_by_path(path)
|
||||
assert not deeplake.exists(path)
|
||||
|
@ -5,7 +5,6 @@ import uuid
|
||||
from typing import Generator, List, Union
|
||||
|
||||
import pytest
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
@ -29,6 +28,8 @@ class TestElasticsearch:
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def elasticsearch_url(self) -> Union[str, Generator[str, None, None]]:
|
||||
"""Return the elasticsearch url."""
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
url = "http://localhost:9200"
|
||||
yield url
|
||||
es = Elasticsearch(hosts=url)
|
||||
@ -108,6 +109,7 @@ class TestElasticsearch:
|
||||
) -> None:
|
||||
"""This test checks the construction of a custom
|
||||
ElasticSearch index using the 'from_documents'."""
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
index_name = f"custom_index_{uuid.uuid4().hex}"
|
||||
elastic_vector_search = ElasticVectorSearch.from_documents(
|
||||
@ -134,6 +136,7 @@ class TestElasticsearch:
|
||||
) -> None:
|
||||
"""This test checks the construction of a custom
|
||||
ElasticSearch index using the 'add_documents'."""
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
index_name = f"custom_index_{uuid.uuid4().hex}"
|
||||
elastic_vector_search = ElasticVectorSearch(
|
||||
|
@ -1,10 +1,10 @@
|
||||
import lancedb
|
||||
|
||||
from langchain.vectorstores import LanceDB
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||
|
||||
|
||||
def test_lancedb() -> None:
|
||||
import lancedb
|
||||
|
||||
embeddings = FakeEmbeddings()
|
||||
db = lancedb.connect("/tmp/lancedb")
|
||||
texts = ["text 1", "text 2", "item 3"]
|
||||
@ -24,6 +24,8 @@ def test_lancedb() -> None:
|
||||
|
||||
|
||||
def test_lancedb_add_texts() -> None:
|
||||
import lancedb
|
||||
|
||||
embeddings = FakeEmbeddings()
|
||||
db = lancedb.connect("/tmp/lancedb")
|
||||
texts = ["text 1"]
|
||||
|
@ -1,7 +1,6 @@
|
||||
"""Test Marqo functionality."""
|
||||
from typing import Dict
|
||||
|
||||
import marqo
|
||||
import pytest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
@ -14,6 +13,8 @@ INDEX_NAME = "langchain-integration-tests"
|
||||
|
||||
@pytest.fixture
|
||||
def client() -> Marqo:
|
||||
import marqo
|
||||
|
||||
# fixture for marqo client to be used throughout testing, resets the index
|
||||
client = marqo.Client(url=DEFAULT_MARQO_URL, api_key=DEFAULT_MARQO_API_KEY)
|
||||
try:
|
||||
@ -128,6 +129,8 @@ def test_marqo_weighted_query(client: Marqo) -> None:
|
||||
|
||||
|
||||
def test_marqo_multimodal() -> None:
|
||||
import marqo
|
||||
|
||||
client = marqo.Client(url=DEFAULT_MARQO_URL, api_key=DEFAULT_MARQO_API_KEY)
|
||||
try:
|
||||
client.index(INDEX_NAME).delete()
|
||||
|
@ -1,7 +1,6 @@
|
||||
"""Test Meilisearch functionality."""
|
||||
from typing import Generator
|
||||
from typing import TYPE_CHECKING, Generator
|
||||
|
||||
import meilisearch
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
@ -9,6 +8,9 @@ from langchain.docstore.document import Document
|
||||
from langchain.vectorstores import Meilisearch
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import meilisearch
|
||||
|
||||
INDEX_NAME = "test-langchain-demo"
|
||||
TEST_MEILI_HTTP_ADDR = "http://localhost:7700"
|
||||
TEST_MEILI_MASTER_KEY = "masterKey"
|
||||
@ -49,7 +51,9 @@ class TestMeilisearchVectorSearch:
|
||||
task = client.index(index.uid).delete()
|
||||
client.wait_for_task(task.task_uid)
|
||||
|
||||
def client(self) -> meilisearch.Client:
|
||||
def client(self) -> "meilisearch.Client":
|
||||
import meilisearch
|
||||
|
||||
return meilisearch.Client(TEST_MEILI_HTTP_ADDR, TEST_MEILI_MASTER_KEY)
|
||||
|
||||
def _wait_last_task(self) -> None:
|
||||
|
@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
from time import sleep
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import pytest
|
||||
|
||||
@ -21,23 +21,27 @@ DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")
|
||||
|
||||
# Instantiate as constant instead of pytest fixture to prevent needing to make multiple
|
||||
# connections.
|
||||
TEST_CLIENT: MongoClient = MongoClient(CONNECTION_STRING)
|
||||
collection = TEST_CLIENT[DB_NAME][COLLECTION_NAME]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def collection() -> Any:
|
||||
test_client = MongoClient(CONNECTION_STRING)
|
||||
return test_client[DB_NAME][COLLECTION_NAME]
|
||||
|
||||
|
||||
class TestMongoDBAtlasVectorSearch:
|
||||
@classmethod
|
||||
def setup_class(cls) -> None:
|
||||
def setup_class(cls, collection: Any) -> None:
|
||||
# insure the test collection is empty
|
||||
assert collection.count_documents({}) == 0 # type: ignore[index] # noqa: E501
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls) -> None:
|
||||
def teardown_class(cls, collection: Any) -> None:
|
||||
# delete all the documents in the collection
|
||||
collection.delete_many({}) # type: ignore[index]
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self) -> None:
|
||||
def setup(self, collection: Any) -> None:
|
||||
# delete all the documents in the collection
|
||||
collection.delete_many({}) # type: ignore[index]
|
||||
|
||||
|
@ -1,8 +1,6 @@
|
||||
"""Test OpenSearch functionality."""
|
||||
|
||||
import boto3
|
||||
import pytest
|
||||
from opensearchpy import AWSV4SignerAuth
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.vectorstores.opensearch_vector_search import (
|
||||
@ -219,6 +217,9 @@ def test_opensearch_with_custom_field_name_appx_false() -> None:
|
||||
|
||||
def test_opensearch_serverless_with_scripting_search_indexing_throws_error() -> None:
|
||||
"""Test to validate indexing using Serverless without Approximate Search."""
|
||||
import boto3
|
||||
from opensearchpy import AWSV4SignerAuth
|
||||
|
||||
region = "test-region"
|
||||
service = "aoss"
|
||||
credentials = boto3.Session().get_credentials()
|
||||
@ -235,6 +236,9 @@ def test_opensearch_serverless_with_scripting_search_indexing_throws_error() ->
|
||||
|
||||
def test_opensearch_serverless_with_lucene_engine_throws_error() -> None:
|
||||
"""Test to validate indexing using lucene engine with Serverless."""
|
||||
import boto3
|
||||
from opensearchpy import AWSV4SignerAuth
|
||||
|
||||
region = "test-region"
|
||||
service = "aoss"
|
||||
credentials = boto3.Session().get_credentials()
|
||||
|
@ -2,16 +2,18 @@ import importlib
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from typing import List
|
||||
from typing import TYPE_CHECKING, List
|
||||
|
||||
import numpy as np
|
||||
import pinecone
|
||||
import pytest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.vectorstores.pinecone import Pinecone
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pinecone
|
||||
|
||||
index_name = "langchain-test-index" # name of the index
|
||||
namespace_name = "langchain-test-namespace" # name of the namespace
|
||||
dimension = 1536 # dimension of the embeddings
|
||||
@ -32,10 +34,12 @@ def reset_pinecone() -> None:
|
||||
|
||||
|
||||
class TestPinecone:
|
||||
index: pinecone.Index
|
||||
index: "pinecone.Index"
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls) -> None:
|
||||
import pinecone
|
||||
|
||||
reset_pinecone()
|
||||
|
||||
cls.index = pinecone.Index(index_name)
|
||||
|
@ -1,9 +1,6 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
import rockset
|
||||
import rockset.models
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.vectorstores.rocksetdb import Rockset
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
@ -44,6 +41,9 @@ class TestRockset:
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls) -> None:
|
||||
import rockset
|
||||
import rockset.models
|
||||
|
||||
assert os.environ.get("ROCKSET_API_KEY") is not None
|
||||
assert os.environ.get("ROCKSET_REGION") is not None
|
||||
|
||||
|
@ -5,7 +5,6 @@ import uuid
|
||||
from typing import Generator, Union
|
||||
|
||||
import pytest
|
||||
from weaviate import Client
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
@ -29,6 +28,8 @@ class TestWeaviate:
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def weaviate_url(self) -> Union[str, Generator[str, None, None]]:
|
||||
"""Return the weaviate url."""
|
||||
from weaviate import Client
|
||||
|
||||
url = "http://localhost:8080"
|
||||
yield url
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user