scheduled tests GHA (#8879)

Adding scheduled daily GHA that runs marked integration tests. To start
just marking some tests in test_openai
This commit is contained in:
Bagatur 2023-08-08 14:55:25 -07:00 committed by GitHub
parent 8f0cd91d57
commit 95cf7de112
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 804 additions and 173 deletions

View File

@ -1,5 +1,5 @@
---
name: libs/langchain-experimental CI
name: libs/experimental CI
on:
push:

View File

@ -1,5 +1,5 @@
---
name: libs/langchain-experimental Release
name: libs/experimental Release
on:
pull_request:

38
.github/workflows/scheduled_test.yml vendored Normal file
View File

@ -0,0 +1,38 @@
name: Scheduled tests
on:
scheduled:
- cron: '0 13 * * *'
env:
POETRY_VERSION: "1.4.2"
jobs:
build:
runs-on: ubuntu-latest
environment: Scheduled testing
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: "1.4.2"
install-command: |
echo "Running scheduled tests, installing dependencies with poetry..."
poetry install -E scheduled_testing
- name: Run tests
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
make scheduled_tests
shell: bash
secrets: inherit

View File

@ -7,6 +7,10 @@ from unittest import mock
import pydantic
import pytest
from langchain import OpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts.prompt import PromptTemplate
from langchain_experimental.cpal.base import (
CausalChain,
CPALChain,
@ -35,10 +39,6 @@ from langchain_experimental.cpal.templates.univariate.narrative import (
from langchain_experimental.cpal.templates.univariate.query import (
template as query_template,
)
from langchain import OpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts.prompt import PromptTemplate
from tests.unit_tests.llms.fake_llm import FakeLLM

View File

@ -1,12 +1,11 @@
"""Test SQL Database Chain."""
from sqlalchemy import Column, Integer, MetaData, String, Table, create_engine, insert
from langchain.chains.sql_database.base import (
from langchain.llms.openai import OpenAI
from langchain.utilities.sql_database import SQLDatabase
from libs.experimental.langchain_experimental.sql.base import (
SQLDatabaseChain,
SQLDatabaseSequentialChain,
)
from langchain.llms.openai import OpenAI
from langchain.utilities.sql_database import SQLDatabase
from sqlalchemy import Column, Integer, MetaData, String, Table, create_engine, insert
metadata_obj = MetaData()

View File

@ -59,6 +59,9 @@ test_watch:
integration_tests:
poetry run pytest tests/integration_tests
scheduled_tests:
poetry run pytest -m scheduled tests/integration_tests
docker_tests:
docker build -t my-langchain-image:test .
docker run --rm my-langchain-image:test

File diff suppressed because it is too large Load Diff

View File

@ -375,6 +375,10 @@ extended_testing = [
"xmltodict",
]
scheduled_testing = [
"openai",
]
[tool.ruff]
select = [
"E", # pycodestyle
@ -413,7 +417,8 @@ addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused
# Registering custom markers.
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
markers = [
"requires: mark tests as requiring a specific library"
"requires: mark tests as requiring a specific library",
"scheduled: mark tests to run in scheduled testing",
]
[tool.codespell]

View File

@ -18,7 +18,7 @@ except ImportError:
gptcache_installed = False
def init_gptcache_map(cache_obj: Cache) -> None:
def init_gptcache_map(cache_obj: Any) -> None:
i = getattr(init_gptcache_map, "_i", 0)
cache_path = f"data_map_{i}.txt"
if os.path.isfile(cache_path):
@ -30,7 +30,7 @@ def init_gptcache_map(cache_obj: Cache) -> None:
init_gptcache_map._i = i + 1 # type: ignore
def init_gptcache_map_with_llm(cache_obj: Cache, llm: str) -> None:
def init_gptcache_map_with_llm(cache_obj: Any, llm: str) -> None:
cache_path = f"data_map_{llm}.txt"
if os.path.isfile(cache_path):
os.remove(cache_path)

View File

@ -11,7 +11,6 @@ from datetime import timedelta
from typing import Iterator
import pytest
from momento import CacheClient, Configurations, CredentialProvider
import langchain
from langchain.cache import MomentoCache
@ -25,6 +24,8 @@ def random_string() -> str:
@pytest.fixture(scope="module")
def momento_cache() -> Iterator[MomentoCache]:
from momento import CacheClient, Configurations, CredentialProvider
cache_name = f"langchain-test-cache-{random_string()}"
client = CacheClient(
Configurations.Laptop.v1(),
@ -40,6 +41,8 @@ def momento_cache() -> Iterator[MomentoCache]:
def test_invalid_ttl() -> None:
from momento import CacheClient, Configurations, CredentialProvider
client = CacheClient(
Configurations.Laptop.v1(),
CredentialProvider.from_environment_variable("MOMENTO_AUTH_TOKEN"),

View File

@ -1,6 +1,5 @@
"""Test Redis cache functionality."""
import pytest
import redis
import langchain
from langchain.cache import RedisCache, RedisSemanticCache
@ -13,6 +12,8 @@ REDIS_TEST_URL = "redis://localhost:6379"
def test_redis_cache() -> None:
import redis
langchain.llm_cache = RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL))
llm = FakeLLM()
params = llm.dict()
@ -31,6 +32,8 @@ def test_redis_cache() -> None:
def test_redis_cache_chat() -> None:
import redis
langchain.llm_cache = RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL))
llm = FakeChatModel()
params = llm.dict()
@ -69,6 +72,8 @@ def test_redis_semantic_cache() -> None:
def test_redis_semantic_cache_chat() -> None:
import redis
langchain.llm_cache = RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL))
llm = FakeChatModel()
params = llm.dict()

View File

@ -2,8 +2,6 @@ import os
from contextlib import ExitStack
from pathlib import Path
from unstructured.cleaners.core import clean_extra_whitespace
from langchain.document_loaders import (
UnstructuredAPIFileIOLoader,
UnstructuredAPIFileLoader,
@ -14,6 +12,8 @@ EXAMPLE_DOCS_DIRECTORY = str(Path(__file__).parent.parent / "examples/")
def test_unstructured_loader_with_post_processor() -> None:
from unstructured.cleaners.core import clean_extra_whitespace
file_path = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper.pdf")
loader = UnstructuredFileLoader(
file_path=file_path,

View File

@ -1,15 +1,13 @@
"""Test Baseten API wrapper."""
import os
import baseten
import pytest
from langchain.llms.baseten import Baseten
@pytest.mark.requires(baseten)
def test_baseten_call() -> None:
"""Test valid call to Baseten."""
import baseten
baseten.login(os.environ["BASETEN_API_KEY"])
llm = Baseten(model=os.environ["BASETEN_MODEL_ID"])
output = llm("Say foo:")

View File

@ -7,16 +7,17 @@ import pytest
from langchain.callbacks.manager import CallbackManager
from langchain.chat_models.openai import ChatOpenAI
from langchain.llms.loading import load_llm
from langchain.llms.openai import OpenAI, OpenAIChat
from langchain.llms.openai import OpenAI
from langchain.schema import LLMResult
from tests.unit_tests.callbacks.fake_callback_handler import (
FakeCallbackHandler,
)
@pytest.mark.scheduled
def test_openai_call() -> None:
"""Test valid call to openai."""
llm = OpenAI(max_tokens=10, n=3)
llm = OpenAI()
output = llm("Say something nice:")
assert isinstance(output, str)
@ -86,6 +87,7 @@ def test_saving_loading_llm(tmp_path: Path) -> None:
assert loaded_llm == llm
@pytest.mark.scheduled
def test_openai_streaming() -> None:
"""Test streaming tokens from OpenAI."""
llm = OpenAI(max_tokens=10)
@ -97,6 +99,7 @@ def test_openai_streaming() -> None:
assert isinstance(token, str)
@pytest.mark.scheduled
@pytest.mark.asyncio
async def test_openai_astream() -> None:
"""Test streaming tokens from OpenAI."""
@ -106,6 +109,7 @@ async def test_openai_astream() -> None:
assert isinstance(token, str)
@pytest.mark.scheduled
@pytest.mark.asyncio
async def test_openai_abatch() -> None:
"""Test streaming tokens from OpenAI."""
@ -128,6 +132,7 @@ async def test_openai_abatch_tags() -> None:
assert isinstance(token, str)
@pytest.mark.scheduled
def test_openai_batch() -> None:
"""Test streaming tokens from OpenAI."""
llm = OpenAI(max_tokens=10)
@ -137,6 +142,7 @@ def test_openai_batch() -> None:
assert isinstance(token, str)
@pytest.mark.scheduled
@pytest.mark.asyncio
async def test_openai_ainvoke() -> None:
"""Test streaming tokens from OpenAI."""
@ -146,6 +152,7 @@ async def test_openai_ainvoke() -> None:
assert isinstance(result, str)
@pytest.mark.scheduled
def test_openai_invoke() -> None:
"""Test streaming tokens from OpenAI."""
llm = OpenAI(max_tokens=10)
@ -154,6 +161,7 @@ def test_openai_invoke() -> None:
assert isinstance(result, str)
@pytest.mark.scheduled
def test_openai_multiple_prompts() -> None:
"""Test completion with multiple prompts."""
llm = OpenAI(max_tokens=10)
@ -181,6 +189,7 @@ def test_openai_streaming_multiple_prompts_error() -> None:
OpenAI(streaming=True).generate(["I'm Pickle Rick", "I'm Pickle Rick"])
@pytest.mark.scheduled
def test_openai_streaming_call() -> None:
"""Test valid call to openai."""
llm = OpenAI(max_tokens=10, streaming=True)
@ -203,6 +212,7 @@ def test_openai_streaming_callback() -> None:
assert callback_handler.llm_streams == 10
@pytest.mark.scheduled
@pytest.mark.asyncio
async def test_openai_async_generate() -> None:
"""Test async generation."""
@ -235,60 +245,6 @@ def test_openai_chat_wrong_class() -> None:
assert isinstance(output, str)
def test_openai_chat() -> None:
"""Test OpenAIChat."""
llm = OpenAIChat(max_tokens=10)
output = llm("Say foo:")
assert isinstance(output, str)
def test_openai_chat_streaming() -> None:
"""Test OpenAIChat with streaming option."""
llm = OpenAIChat(max_tokens=10, streaming=True)
output = llm("Say foo:")
assert isinstance(output, str)
def test_openai_chat_streaming_callback() -> None:
"""Test that streaming correctly invokes on_llm_new_token callback."""
callback_handler = FakeCallbackHandler()
callback_manager = CallbackManager([callback_handler])
llm = OpenAIChat(
max_tokens=10,
streaming=True,
temperature=0,
callback_manager=callback_manager,
verbose=True,
)
llm("Write me a sentence with 100 words.")
assert callback_handler.llm_streams != 0
@pytest.mark.asyncio
async def test_openai_chat_async_generate() -> None:
"""Test async chat."""
llm = OpenAIChat(max_tokens=10)
output = await llm.agenerate(["Hello, how are you?"])
assert isinstance(output, LLMResult)
@pytest.mark.asyncio
async def test_openai_chat_async_streaming_callback() -> None:
"""Test that streaming correctly invokes on_llm_new_token callback."""
callback_handler = FakeCallbackHandler()
callback_manager = CallbackManager([callback_handler])
llm = OpenAIChat(
max_tokens=10,
streaming=True,
temperature=0,
callback_manager=callback_manager,
verbose=True,
)
result = await llm.agenerate(["Write me a sentence with 100 words."])
assert callback_handler.llm_streams != 0
assert isinstance(result, LLMResult)
def test_openai_modelname_to_contextsize_valid() -> None:
"""Test model name to context size on a valid model."""
assert OpenAI().modelname_to_contextsize("davinci") == 2049

View File

@ -6,8 +6,8 @@ from langchain.memory.chat_message_histories import CosmosDBChatMessageHistory
from langchain.schema.messages import _message_to_dict
# Replace these with your Azure Cosmos DB endpoint and key
endpoint = os.environ["COSMOS_DB_ENDPOINT"]
credential = os.environ["COSMOS_DB_KEY"]
endpoint = os.environ.get("COSMOS_DB_ENDPOINT", "")
credential = os.environ.get("COSMOS_DB_KEY", "")
def test_memory_with_message_store() -> None:

View File

@ -10,7 +10,6 @@ from datetime import timedelta
from typing import Iterator
import pytest
from momento import CacheClient, Configurations, CredentialProvider
from langchain.memory import ConversationBufferMemory
from langchain.memory.chat_message_histories import MomentoChatMessageHistory
@ -23,6 +22,8 @@ def random_string() -> str:
@pytest.fixture(scope="function")
def message_history() -> Iterator[MomentoChatMessageHistory]:
from momento import CacheClient, Configurations, CredentialProvider
cache_name = f"langchain-test-cache-{random_string()}"
client = CacheClient(
Configurations.Laptop.v1(),

View File

@ -6,7 +6,7 @@ from langchain.memory.chat_message_histories import MongoDBChatMessageHistory
from langchain.schema.messages import _message_to_dict
# Replace these with your mongodb connection string
connection_string = os.environ["MONGODB_CONNECTION_STRING"]
connection_string = os.environ.get("MONGODB_CONNECTION_STRING", "")
def test_memory_with_message_store() -> None:

View File

@ -1,43 +1,30 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, Generator, Tuple
from typing import TYPE_CHECKING, Any, Dict, Generator, Tuple
import numpy as np
import pytest
from docarray import BaseDoc
from docarray.index import (
ElasticDocIndex,
HnswDocumentIndex,
InMemoryExactNNIndex,
QdrantDocumentIndex,
WeaviateDocumentIndex,
)
from docarray.typing import NdArray
from pydantic import Field
from qdrant_client.http import models as rest
if TYPE_CHECKING:
from docarray.index import (
ElasticDocIndex,
HnswDocumentIndex,
InMemoryExactNNIndex,
QdrantDocumentIndex,
WeaviateDocumentIndex,
)
from docarray.typing import NdArray
from qdrant_client.http import models as rest
from langchain.embeddings import FakeEmbeddings
class MyDoc(BaseDoc):
title: str
title_embedding: NdArray[32] # type: ignore
other_emb: NdArray[32] # type: ignore
year: int
class WeaviateDoc(BaseDoc):
# When initializing the Weaviate index, denote the field
# you want to search on with `is_embedding=True`
title: str
title_embedding: NdArray[32] = Field(is_embedding=True) # type: ignore
other_emb: NdArray[32] # type: ignore
year: int
@pytest.fixture
def init_weaviate() -> (
Generator[
Tuple[WeaviateDocumentIndex[WeaviateDoc], Dict[str, Any], FakeEmbeddings],
Tuple[WeaviateDocumentIndex, Dict[str, Any], FakeEmbeddings],
None,
None,
]
@ -46,6 +33,19 @@ def init_weaviate() -> (
cd tests/integration_tests/vectorstores/docker-compose
docker compose -f weaviate.yml up
"""
from docarray import BaseDoc
from docarray.index import (
WeaviateDocumentIndex,
)
class WeaviateDoc(BaseDoc):
# When initializing the Weaviate index, denote the field
# you want to search on with `is_embedding=True`
title: str
title_embedding: NdArray[32] = Field(is_embedding=True) # type: ignore
other_emb: NdArray[32] # type: ignore
year: int
embeddings = FakeEmbeddings(size=32)
# initialize WeaviateDocumentIndex
@ -76,12 +76,23 @@ def init_weaviate() -> (
@pytest.fixture
def init_elastic() -> (
Generator[Tuple[ElasticDocIndex[MyDoc], Dict[str, Any], FakeEmbeddings], None, None]
Generator[Tuple[ElasticDocIndex, Dict[str, Any], FakeEmbeddings], None, None]
):
"""
cd tests/integration_tests/vectorstores/docker-compose
docker-compose -f elasticsearch.yml up
"""
from docarray import BaseDoc
from docarray.index import (
ElasticDocIndex,
)
class MyDoc(BaseDoc):
title: str
title_embedding: NdArray[32] # type: ignore
other_emb: NdArray[32] # type: ignore
year: int
embeddings = FakeEmbeddings(size=32)
# initialize ElasticDocIndex
@ -109,7 +120,16 @@ def init_elastic() -> (
@pytest.fixture
def init_qdrant() -> Tuple[QdrantDocumentIndex[MyDoc], rest.Filter, FakeEmbeddings]:
def init_qdrant() -> Tuple[QdrantDocumentIndex, rest.Filter, FakeEmbeddings]:
from docarray import BaseDoc
from docarray.index import QdrantDocumentIndex
class MyDoc(BaseDoc):
title: str
title_embedding: NdArray[32] # type: ignore
other_emb: NdArray[32] # type: ignore
year: int
embeddings = FakeEmbeddings(size=32)
# initialize QdrantDocumentIndex
@ -144,9 +164,16 @@ def init_qdrant() -> Tuple[QdrantDocumentIndex[MyDoc], rest.Filter, FakeEmbeddin
@pytest.fixture
def init_in_memory() -> (
Tuple[InMemoryExactNNIndex[MyDoc], Dict[str, Any], FakeEmbeddings]
):
def init_in_memory() -> Tuple[InMemoryExactNNIndex, Dict[str, Any], FakeEmbeddings]:
from docarray import BaseDoc
from docarray.index import InMemoryExactNNIndex
class MyDoc(BaseDoc):
title: str
title_embedding: NdArray[32] # type: ignore
other_emb: NdArray[32] # type: ignore
year: int
embeddings = FakeEmbeddings(size=32)
# initialize InMemoryExactNNIndex
@ -172,7 +199,18 @@ def init_in_memory() -> (
@pytest.fixture
def init_hnsw(
tmp_path: Path,
) -> Tuple[HnswDocumentIndex[MyDoc], Dict[str, Any], FakeEmbeddings]:
) -> Tuple[HnswDocumentIndex, Dict[str, Any], FakeEmbeddings]:
from docarray import BaseDoc
from docarray.index import (
HnswDocumentIndex,
)
class MyDoc(BaseDoc):
title: str
title_embedding: NdArray[32] # type: ignore
other_emb: NdArray[32] # type: ignore
year: int
embeddings = FakeEmbeddings(size=32)
# initialize InMemoryExactNNIndex

View File

@ -1,7 +1,6 @@
from typing import Any
import pytest
from vcr.request import Request
from langchain.retrievers import DocArrayRetriever
from tests.integration_tests.retrievers.docarray.fixtures import ( # noqa: F401
@ -17,7 +16,7 @@ from tests.integration_tests.retrievers.docarray.fixtures import ( # noqa: F401
"backend",
["init_hnsw", "init_in_memory", "init_qdrant", "init_elastic", "init_weaviate"],
)
def test_backends(request: Request, backend: Any) -> None:
def test_backends(request: Any, backend: Any) -> None:
index, filter_query, embeddings = request.getfixturevalue(backend)
# create a retriever

View File

@ -6,7 +6,6 @@ from typing import Generator, Union
from uuid import uuid4
import pytest
from weaviate import Client
from langchain.docstore.document import Document
from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever
@ -28,6 +27,8 @@ class TestWeaviateHybridSearchRetriever:
@pytest.fixture(scope="class", autouse=True)
def weaviate_url(self) -> Union[str, Generator[str, None, None]]:
"""Return the weaviate url."""
from weaviate import Client
url = "http://localhost:8080"
yield url
@ -38,6 +39,8 @@ class TestWeaviateHybridSearchRetriever:
@pytest.mark.vcr(ignore_localhost=True)
def test_get_relevant_documents(self, weaviate_url: str) -> None:
"""Test end to end construction and MRR search."""
from weaviate import Client
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
@ -64,6 +67,8 @@ class TestWeaviateHybridSearchRetriever:
@pytest.mark.vcr(ignore_localhost=True)
def test_get_relevant_documents_with_score(self, weaviate_url: str) -> None:
"""Test end to end construction and MRR search."""
from weaviate import Client
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
@ -87,6 +92,8 @@ class TestWeaviateHybridSearchRetriever:
@pytest.mark.vcr(ignore_localhost=True)
def test_get_relevant_documents_with_filter(self, weaviate_url: str) -> None:
"""Test end to end construction and MRR search."""
from weaviate import Client
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
@ -113,6 +120,8 @@ class TestWeaviateHybridSearchRetriever:
@pytest.mark.vcr(ignore_localhost=True)
def test_get_relevant_documents_with_uuids(self, weaviate_url: str) -> None:
"""Test end to end construction and MRR search."""
from weaviate import Client
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
# Weaviate replaces the object if the UUID already exists

View File

@ -2,7 +2,6 @@ import uuid
from typing import Optional
import pytest
from qdrant_client.http import models as rest
from langchain.vectorstores import Qdrant
from tests.integration_tests.vectorstores.fake_embeddings import (
@ -69,6 +68,7 @@ async def test_qdrant_aadd_texts_stores_ids(
) -> None:
"""Test end to end Qdrant.aadd_texts stores provided ids."""
from qdrant_client import QdrantClient
from qdrant_client.http import models as rest
ids = [
"fa38d572-4c31-4579-aedc-1960d79df6df",
@ -101,6 +101,7 @@ async def test_qdrant_aadd_texts_stores_embeddings_as_named_vectors(
) -> None:
"""Test end to end Qdrant.aadd_texts stores named vectors if name is provided."""
from qdrant_client import QdrantClient
from qdrant_client.http import models as rest
collection_name = uuid.uuid4().hex

View File

@ -2,7 +2,6 @@ from typing import Optional
import numpy as np
import pytest
from qdrant_client.http import models as rest
from langchain.schema import Document
from langchain.vectorstores import Qdrant
@ -239,6 +238,8 @@ async def test_qdrant_similarity_search_filters_with_qdrant_filters(
qdrant_location: str,
) -> None:
"""Test end to end construction and search."""
from qdrant_client.http import models as rest
texts = ["foo", "bar", "baz"]
metadatas = [
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}

View File

@ -2,7 +2,6 @@ import uuid
from typing import Optional
import pytest
from qdrant_client.http import models as rest
from langchain.schema import Document
from langchain.vectorstores import Qdrant
@ -81,6 +80,7 @@ def test_qdrant_add_texts_stores_duplicated_texts(vector_name: Optional[str]) ->
def test_qdrant_add_texts_stores_ids(batch_size: int) -> None:
"""Test end to end Qdrant.add_texts stores provided ids."""
from qdrant_client import QdrantClient
from qdrant_client.http import models as rest
ids = [
"fa38d572-4c31-4579-aedc-1960d79df6df",
@ -107,6 +107,7 @@ def test_qdrant_add_texts_stores_ids(batch_size: int) -> None:
def test_qdrant_add_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
"""Test end to end Qdrant.add_texts stores named vectors if name is provided."""
from qdrant_client import QdrantClient
from qdrant_client.http import models as rest
collection_name = uuid.uuid4().hex

View File

@ -2,7 +2,6 @@ from typing import Optional
import numpy as np
import pytest
from qdrant_client.http import models as rest
from langchain.schema import Document
from langchain.vectorstores import Qdrant
@ -209,6 +208,8 @@ def test_qdrant_similarity_search_filters_with_qdrant_filters(
vector_name: Optional[str],
) -> None:
"""Test end to end construction and search."""
from qdrant_client.http import models as rest
texts = ["foo", "bar", "baz"]
metadatas = [
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}

View File

@ -1,7 +1,6 @@
import os
import time
import openai
import pytest
from dotenv import load_dotenv
@ -10,13 +9,7 @@ from langchain.vectorstores.azuresearch import AzureSearch
load_dotenv()
# Azure OpenAI settings
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE", "")
openai.api_version = "2023-05-15"
openai.api_key = os.getenv("OPENAI_API_KEY", "")
model: str = os.getenv("OPENAI_EMBEDDINGS_ENGINE_DOC", "text-embedding-ada-002")
model = os.getenv("OPENAI_EMBEDDINGS_ENGINE_DOC", "text-embedding-ada-002")
# Vector store settings
vector_store_address: str = os.getenv("AZURE_SEARCH_ENDPOINT", "")
vector_store_password: str = os.getenv("AZURE_SEARCH_ADMIN_KEY", "")

View File

@ -1,5 +1,4 @@
"""Test Deep Lake functionality."""
import deeplake
import pytest
from pytest import FixtureRequest
@ -53,6 +52,8 @@ def test_deeplake_with_metadatas() -> None:
def test_deeplakewith_persistence() -> None:
"""Test end to end construction and search, with persistence."""
import deeplake
dataset_path = "./tests/persist_dir"
if deeplake.exists(dataset_path):
deeplake.delete(dataset_path)
@ -84,6 +85,8 @@ def test_deeplakewith_persistence() -> None:
def test_deeplake_overwrite_flag() -> None:
"""Test overwrite behavior"""
import deeplake
dataset_path = "./tests/persist_dir"
if deeplake.exists(dataset_path):
deeplake.delete(dataset_path)
@ -234,6 +237,8 @@ def test_delete_dataset_by_filter(deeplake_datastore: DeepLake) -> None:
def test_delete_by_path(deeplake_datastore: DeepLake) -> None:
"""Test delete dataset."""
import deeplake
path = deeplake_datastore.dataset_path
DeepLake.force_delete_by_path(path)
assert not deeplake.exists(path)

View File

@ -5,7 +5,6 @@ import uuid
from typing import Generator, List, Union
import pytest
from elasticsearch import Elasticsearch
from langchain.docstore.document import Document
from langchain.embeddings import OpenAIEmbeddings
@ -29,6 +28,8 @@ class TestElasticsearch:
@pytest.fixture(scope="class", autouse=True)
def elasticsearch_url(self) -> Union[str, Generator[str, None, None]]:
"""Return the elasticsearch url."""
from elasticsearch import Elasticsearch
url = "http://localhost:9200"
yield url
es = Elasticsearch(hosts=url)
@ -108,6 +109,7 @@ class TestElasticsearch:
) -> None:
"""This test checks the construction of a custom
ElasticSearch index using the 'from_documents'."""
from elasticsearch import Elasticsearch
index_name = f"custom_index_{uuid.uuid4().hex}"
elastic_vector_search = ElasticVectorSearch.from_documents(
@ -134,6 +136,7 @@ class TestElasticsearch:
) -> None:
"""This test checks the construction of a custom
ElasticSearch index using the 'add_documents'."""
from elasticsearch import Elasticsearch
index_name = f"custom_index_{uuid.uuid4().hex}"
elastic_vector_search = ElasticVectorSearch(

View File

@ -1,10 +1,10 @@
import lancedb
from langchain.vectorstores import LanceDB
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
def test_lancedb() -> None:
import lancedb
embeddings = FakeEmbeddings()
db = lancedb.connect("/tmp/lancedb")
texts = ["text 1", "text 2", "item 3"]
@ -24,6 +24,8 @@ def test_lancedb() -> None:
def test_lancedb_add_texts() -> None:
import lancedb
embeddings = FakeEmbeddings()
db = lancedb.connect("/tmp/lancedb")
texts = ["text 1"]

View File

@ -1,7 +1,6 @@
"""Test Marqo functionality."""
from typing import Dict
import marqo
import pytest
from langchain.docstore.document import Document
@ -14,6 +13,8 @@ INDEX_NAME = "langchain-integration-tests"
@pytest.fixture
def client() -> Marqo:
import marqo
# fixture for marqo client to be used throughout testing, resets the index
client = marqo.Client(url=DEFAULT_MARQO_URL, api_key=DEFAULT_MARQO_API_KEY)
try:
@ -128,6 +129,8 @@ def test_marqo_weighted_query(client: Marqo) -> None:
def test_marqo_multimodal() -> None:
import marqo
client = marqo.Client(url=DEFAULT_MARQO_URL, api_key=DEFAULT_MARQO_API_KEY)
try:
client.index(INDEX_NAME).delete()

View File

@ -1,7 +1,6 @@
"""Test Meilisearch functionality."""
from typing import Generator
from typing import TYPE_CHECKING, Generator
import meilisearch
import pytest
import requests
@ -9,6 +8,9 @@ from langchain.docstore.document import Document
from langchain.vectorstores import Meilisearch
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
if TYPE_CHECKING:
import meilisearch
INDEX_NAME = "test-langchain-demo"
TEST_MEILI_HTTP_ADDR = "http://localhost:7700"
TEST_MEILI_MASTER_KEY = "masterKey"
@ -49,7 +51,9 @@ class TestMeilisearchVectorSearch:
task = client.index(index.uid).delete()
client.wait_for_task(task.task_uid)
def client(self) -> meilisearch.Client:
def client(self) -> "meilisearch.Client":
import meilisearch
return meilisearch.Client(TEST_MEILI_HTTP_ADDR, TEST_MEILI_MASTER_KEY)
def _wait_last_task(self) -> None:

View File

@ -3,7 +3,7 @@ from __future__ import annotations
import os
from time import sleep
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any
import pytest
@ -21,23 +21,27 @@ DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")
# Instantiate as constant instead of pytest fixture to prevent needing to make multiple
# connections.
TEST_CLIENT: MongoClient = MongoClient(CONNECTION_STRING)
collection = TEST_CLIENT[DB_NAME][COLLECTION_NAME]
@pytest.fixture
def collection() -> Any:
test_client = MongoClient(CONNECTION_STRING)
return test_client[DB_NAME][COLLECTION_NAME]
class TestMongoDBAtlasVectorSearch:
@classmethod
def setup_class(cls) -> None:
def setup_class(cls, collection: Any) -> None:
# insure the test collection is empty
assert collection.count_documents({}) == 0 # type: ignore[index] # noqa: E501
@classmethod
def teardown_class(cls) -> None:
def teardown_class(cls, collection: Any) -> None:
# delete all the documents in the collection
collection.delete_many({}) # type: ignore[index]
@pytest.fixture(autouse=True)
def setup(self) -> None:
def setup(self, collection: Any) -> None:
# delete all the documents in the collection
collection.delete_many({}) # type: ignore[index]

View File

@ -1,8 +1,6 @@
"""Test OpenSearch functionality."""
import boto3
import pytest
from opensearchpy import AWSV4SignerAuth
from langchain.docstore.document import Document
from langchain.vectorstores.opensearch_vector_search import (
@ -219,6 +217,9 @@ def test_opensearch_with_custom_field_name_appx_false() -> None:
def test_opensearch_serverless_with_scripting_search_indexing_throws_error() -> None:
"""Test to validate indexing using Serverless without Approximate Search."""
import boto3
from opensearchpy import AWSV4SignerAuth
region = "test-region"
service = "aoss"
credentials = boto3.Session().get_credentials()
@ -235,6 +236,9 @@ def test_opensearch_serverless_with_scripting_search_indexing_throws_error() ->
def test_opensearch_serverless_with_lucene_engine_throws_error() -> None:
"""Test to validate indexing using lucene engine with Serverless."""
import boto3
from opensearchpy import AWSV4SignerAuth
region = "test-region"
service = "aoss"
credentials = boto3.Session().get_credentials()

View File

@ -2,16 +2,18 @@ import importlib
import os
import time
import uuid
from typing import List
from typing import TYPE_CHECKING, List
import numpy as np
import pinecone
import pytest
from langchain.docstore.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.pinecone import Pinecone
if TYPE_CHECKING:
import pinecone
index_name = "langchain-test-index" # name of the index
namespace_name = "langchain-test-namespace" # name of the namespace
dimension = 1536 # dimension of the embeddings
@ -32,10 +34,12 @@ def reset_pinecone() -> None:
class TestPinecone:
index: pinecone.Index
index: "pinecone.Index"
@classmethod
def setup_class(cls) -> None:
import pinecone
reset_pinecone()
cls.index = pinecone.Index(index_name)

View File

@ -1,9 +1,6 @@
import logging
import os
import rockset
import rockset.models
from langchain.docstore.document import Document
from langchain.vectorstores.rocksetdb import Rockset
from tests.integration_tests.vectorstores.fake_embeddings import (
@ -44,6 +41,9 @@ class TestRockset:
@classmethod
def setup_class(cls) -> None:
import rockset
import rockset.models
assert os.environ.get("ROCKSET_API_KEY") is not None
assert os.environ.get("ROCKSET_REGION") is not None

View File

@ -5,7 +5,6 @@ import uuid
from typing import Generator, Union
import pytest
from weaviate import Client
from langchain.docstore.document import Document
from langchain.embeddings.openai import OpenAIEmbeddings
@ -29,6 +28,8 @@ class TestWeaviate:
@pytest.fixture(scope="class", autouse=True)
def weaviate_url(self) -> Union[str, Generator[str, None, None]]:
"""Return the weaviate url."""
from weaviate import Client
url = "http://localhost:8080"
yield url