scheduled tests GHA (#8879)

Adding scheduled daily GHA that runs marked integration tests. To start just marking some tests in test_openai
2025-08-10 21:35:08 +00:00 · 2023-08-08 14:55:25 -07:00 · 2023-08-08 14:55:25 -07:00 · 95cf7de112
commit 95cf7de112
parent 8f0cd91d57
36 changed files with 804 additions and 173 deletions
--- a/.github/workflows/langchain_experimental_ci.yml
+++ b/.github/workflows/langchain_experimental_ci.yml
@ -1,5 +1,5 @@
 ---
-name: libs/langchain-experimental CI
+name: libs/experimental CI
 on:
  push:
--- a/.github/workflows/langchain_experimental_release.yml
+++ b/.github/workflows/langchain_experimental_release.yml
@ -1,5 +1,5 @@
 ---
-name: libs/langchain-experimental Release
+name: libs/experimental Release
 on:
  pull_request:
--- a/.github/workflows/scheduled_test.yml
+++ b/.github/workflows/scheduled_test.yml
@ -0,0 +1,38 @@
 name: Scheduled tests
 on:
  scheduled:
    - cron:  '0 13 * * *'
 env:
  POETRY_VERSION: "1.4.2"
 jobs:
  build:
    runs-on: ubuntu-latest
    environment: Scheduled testing
    strategy:
      matrix:
        python-version:
          - "3.8"
          - "3.9"
          - "3.10"
          - "3.11"
    name: Python ${{ matrix.python-version }}
    steps:
      - uses: actions/checkout@v3
      - name: Set up Python ${{ matrix.python-version }}
        uses: "./.github/actions/poetry_setup"
        with:
          python-version: ${{ matrix.python-version }}
          poetry-version: "1.4.2"
          install-command: |
            echo "Running scheduled tests, installing dependencies with poetry..."
            poetry install -E scheduled_testing
      - name: Run tests
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        run: |
          make scheduled_tests
        shell: bash
    secrets: inherit
--- a/libs/experimental/tests/integration_tests/chains/test_cpal.py
+++ b/libs/experimental/tests/integration_tests/chains/test_cpal.py
@ -7,6 +7,10 @@ from unittest import mock
 import pydantic
 import pytest
 from langchain import OpenAI
 from langchain.output_parsers import PydanticOutputParser
 from langchain.prompts.prompt import PromptTemplate
 from langchain_experimental.cpal.base import (
    CausalChain,
    CPALChain,
@ -35,10 +39,6 @@ from langchain_experimental.cpal.templates.univariate.narrative import (
 from langchain_experimental.cpal.templates.univariate.query import (
    template as query_template,
 )
 from langchain import OpenAI
 from langchain.output_parsers import PydanticOutputParser
 from langchain.prompts.prompt import PromptTemplate
 from tests.unit_tests.llms.fake_llm import FakeLLM
--- a/libs/experimental/tests/integration_tests/chains/test_pal.py
+++ b/libs/experimental/tests/integration_tests/chains/test_pal.py
--- a/libs/experimental/tests/integration_tests/chains/test_sql_database.py
+++ b/libs/experimental/tests/integration_tests/chains/test_sql_database.py
@ -1,12 +1,11 @@
 """Test SQL Database Chain."""
-from sqlalchemy import Column, Integer, MetaData, String, Table, create_engine, insert
+from langchain.llms.openai import OpenAI
-
+from langchain.utilities.sql_database import SQLDatabase
-from langchain.chains.sql_database.base import (
+from libs.experimental.langchain_experimental.sql.base import (
    SQLDatabaseChain,
    SQLDatabaseSequentialChain,
 )
-from langchain.llms.openai import OpenAI
+from sqlalchemy import Column, Integer, MetaData, String, Table, create_engine, insert
 from langchain.utilities.sql_database import SQLDatabase
 metadata_obj = MetaData()
--- a/libs/langchain/Makefile
+++ b/libs/langchain/Makefile
@ -59,6 +59,9 @@ test_watch:
 integration_tests:
 	poetry run pytest tests/integration_tests
 scheduled_tests:
 	poetry run pytest -m scheduled tests/integration_tests
 docker_tests:
 	docker build -t my-langchain-image:test .
 	docker run --rm my-langchain-image:test
--- a/libs/langchain/poetry.lock
+++ b/libs/langchain/poetry.lock
--- a/libs/langchain/pyproject.toml
+++ b/libs/langchain/pyproject.toml
@ -375,6 +375,10 @@ extended_testing = [
 "xmltodict",
 ]
 scheduled_testing = [
 "openai",
 ]
 [tool.ruff]
 select = [
  "E",  # pycodestyle
@ -413,7 +417,8 @@ addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused
 # Registering custom markers.
 # https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
 markers = [
-  "requires: mark tests as requiring a specific library"
+  "requires: mark tests as requiring a specific library",
  "scheduled: mark tests to run in scheduled testing",
 ]
 [tool.codespell]
--- a/libs/langchain/tests/integration_tests/cache/test_gptcache.py
+++ b/libs/langchain/tests/integration_tests/cache/test_gptcache.py
@ -18,7 +18,7 @@ except ImportError:
    gptcache_installed = False
-def init_gptcache_map(cache_obj: Cache) -> None:
+def init_gptcache_map(cache_obj: Any) -> None:
    i = getattr(init_gptcache_map, "_i", 0)
    cache_path = f"data_map_{i}.txt"
    if os.path.isfile(cache_path):
@ -30,7 +30,7 @@ def init_gptcache_map(cache_obj: Cache) -> None:
    init_gptcache_map._i = i + 1  # type: ignore
-def init_gptcache_map_with_llm(cache_obj: Cache, llm: str) -> None:
+def init_gptcache_map_with_llm(cache_obj: Any, llm: str) -> None:
    cache_path = f"data_map_{llm}.txt"
    if os.path.isfile(cache_path):
        os.remove(cache_path)
--- a/libs/langchain/tests/integration_tests/cache/test_momento_cache.py
+++ b/libs/langchain/tests/integration_tests/cache/test_momento_cache.py
@ -11,7 +11,6 @@ from datetime import timedelta
 from typing import Iterator
 import pytest
 from momento import CacheClient, Configurations, CredentialProvider
 import langchain
 from langchain.cache import MomentoCache
@ -25,6 +24,8 @@ def random_string() -> str:
@pytest.fixture(scope="module")
 def momento_cache() -> Iterator[MomentoCache]:
    from momento import CacheClient, Configurations, CredentialProvider
    cache_name = f"langchain-test-cache-{random_string()}"
    client = CacheClient(
        Configurations.Laptop.v1(),
@ -40,6 +41,8 @@ def momento_cache() -> Iterator[MomentoCache]:
 def test_invalid_ttl() -> None:
    from momento import CacheClient, Configurations, CredentialProvider
    client = CacheClient(
        Configurations.Laptop.v1(),
        CredentialProvider.from_environment_variable("MOMENTO_AUTH_TOKEN"),
--- a/libs/langchain/tests/integration_tests/cache/test_redis_cache.py
+++ b/libs/langchain/tests/integration_tests/cache/test_redis_cache.py
@ -1,6 +1,5 @@
 """Test Redis cache functionality."""
 import pytest
 import redis
 import langchain
 from langchain.cache import RedisCache, RedisSemanticCache
@ -13,6 +12,8 @@ REDIS_TEST_URL = "redis://localhost:6379"
 def test_redis_cache() -> None:
    import redis
    langchain.llm_cache = RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL))
    llm = FakeLLM()
    params = llm.dict()
@ -31,6 +32,8 @@ def test_redis_cache() -> None:
 def test_redis_cache_chat() -> None:
    import redis
    langchain.llm_cache = RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL))
    llm = FakeChatModel()
    params = llm.dict()
@ -69,6 +72,8 @@ def test_redis_semantic_cache() -> None:
 def test_redis_semantic_cache_chat() -> None:
    import redis
    langchain.llm_cache = RedisCache(redis_=redis.Redis.from_url(REDIS_TEST_URL))
    llm = FakeChatModel()
    params = llm.dict()
--- a/libs/langchain/tests/integration_tests/document_loaders/test_unstructured.py
+++ b/libs/langchain/tests/integration_tests/document_loaders/test_unstructured.py
@ -2,8 +2,6 @@ import os
 from contextlib import ExitStack
 from pathlib import Path
 from unstructured.cleaners.core import clean_extra_whitespace
 from langchain.document_loaders import (
    UnstructuredAPIFileIOLoader,
    UnstructuredAPIFileLoader,
@ -14,6 +12,8 @@ EXAMPLE_DOCS_DIRECTORY = str(Path(__file__).parent.parent / "examples/")
 def test_unstructured_loader_with_post_processor() -> None:
    from unstructured.cleaners.core import clean_extra_whitespace
    file_path = os.path.join(EXAMPLE_DOCS_DIRECTORY, "layout-parser-paper.pdf")
    loader = UnstructuredFileLoader(
        file_path=file_path,
--- a/libs/langchain/tests/integration_tests/llms/test_baseten.py
+++ b/libs/langchain/tests/integration_tests/llms/test_baseten.py
@ -1,15 +1,13 @@
 """Test Baseten API wrapper."""
 import os
 import baseten
 import pytest
 from langchain.llms.baseten import Baseten
@pytest.mark.requires(baseten)
 def test_baseten_call() -> None:
    """Test valid call to Baseten."""
    import baseten
    baseten.login(os.environ["BASETEN_API_KEY"])
    llm = Baseten(model=os.environ["BASETEN_MODEL_ID"])
    output = llm("Say foo:")
--- a/libs/langchain/tests/integration_tests/llms/test_openai.py
+++ b/libs/langchain/tests/integration_tests/llms/test_openai.py
@ -7,16 +7,17 @@ import pytest
 from langchain.callbacks.manager import CallbackManager
 from langchain.chat_models.openai import ChatOpenAI
 from langchain.llms.loading import load_llm
-from langchain.llms.openai import OpenAI, OpenAIChat
+from langchain.llms.openai import OpenAI
 from langchain.schema import LLMResult
 from tests.unit_tests.callbacks.fake_callback_handler import (
    FakeCallbackHandler,
 )
@pytest.mark.scheduled
 def test_openai_call() -> None:
    """Test valid call to openai."""
-    llm = OpenAI(max_tokens=10, n=3)
+    llm = OpenAI()
    output = llm("Say something nice:")
    assert isinstance(output, str)
@ -86,6 +87,7 @@ def test_saving_loading_llm(tmp_path: Path) -> None:
    assert loaded_llm == llm
@pytest.mark.scheduled
 def test_openai_streaming() -> None:
    """Test streaming tokens from OpenAI."""
    llm = OpenAI(max_tokens=10)
@ -97,6 +99,7 @@ def test_openai_streaming() -> None:
        assert isinstance(token, str)
@pytest.mark.scheduled
@pytest.mark.asyncio
 async def test_openai_astream() -> None:
    """Test streaming tokens from OpenAI."""
@ -106,6 +109,7 @@ async def test_openai_astream() -> None:
        assert isinstance(token, str)
@pytest.mark.scheduled
@pytest.mark.asyncio
 async def test_openai_abatch() -> None:
    """Test streaming tokens from OpenAI."""
@ -128,6 +132,7 @@ async def test_openai_abatch_tags() -> None:
        assert isinstance(token, str)
@pytest.mark.scheduled
 def test_openai_batch() -> None:
    """Test streaming tokens from OpenAI."""
    llm = OpenAI(max_tokens=10)
@ -137,6 +142,7 @@ def test_openai_batch() -> None:
        assert isinstance(token, str)
@pytest.mark.scheduled
@pytest.mark.asyncio
 async def test_openai_ainvoke() -> None:
    """Test streaming tokens from OpenAI."""
@ -146,6 +152,7 @@ async def test_openai_ainvoke() -> None:
    assert isinstance(result, str)
@pytest.mark.scheduled
 def test_openai_invoke() -> None:
    """Test streaming tokens from OpenAI."""
    llm = OpenAI(max_tokens=10)
@ -154,6 +161,7 @@ def test_openai_invoke() -> None:
    assert isinstance(result, str)
@pytest.mark.scheduled
 def test_openai_multiple_prompts() -> None:
    """Test completion with multiple prompts."""
    llm = OpenAI(max_tokens=10)
@ -181,6 +189,7 @@ def test_openai_streaming_multiple_prompts_error() -> None:
        OpenAI(streaming=True).generate(["I'm Pickle Rick", "I'm Pickle Rick"])
@pytest.mark.scheduled
 def test_openai_streaming_call() -> None:
    """Test valid call to openai."""
    llm = OpenAI(max_tokens=10, streaming=True)
@ -203,6 +212,7 @@ def test_openai_streaming_callback() -> None:
    assert callback_handler.llm_streams == 10
@pytest.mark.scheduled
@pytest.mark.asyncio
 async def test_openai_async_generate() -> None:
    """Test async generation."""
@ -235,60 +245,6 @@ def test_openai_chat_wrong_class() -> None:
    assert isinstance(output, str)
 def test_openai_chat() -> None:
    """Test OpenAIChat."""
    llm = OpenAIChat(max_tokens=10)
    output = llm("Say foo:")
    assert isinstance(output, str)
 def test_openai_chat_streaming() -> None:
    """Test OpenAIChat with streaming option."""
    llm = OpenAIChat(max_tokens=10, streaming=True)
    output = llm("Say foo:")
    assert isinstance(output, str)
 def test_openai_chat_streaming_callback() -> None:
    """Test that streaming correctly invokes on_llm_new_token callback."""
    callback_handler = FakeCallbackHandler()
    callback_manager = CallbackManager([callback_handler])
    llm = OpenAIChat(
        max_tokens=10,
        streaming=True,
        temperature=0,
        callback_manager=callback_manager,
        verbose=True,
    )
    llm("Write me a sentence with 100 words.")
    assert callback_handler.llm_streams != 0
@pytest.mark.asyncio
 async def test_openai_chat_async_generate() -> None:
    """Test async chat."""
    llm = OpenAIChat(max_tokens=10)
    output = await llm.agenerate(["Hello, how are you?"])
    assert isinstance(output, LLMResult)
@pytest.mark.asyncio
 async def test_openai_chat_async_streaming_callback() -> None:
    """Test that streaming correctly invokes on_llm_new_token callback."""
    callback_handler = FakeCallbackHandler()
    callback_manager = CallbackManager([callback_handler])
    llm = OpenAIChat(
        max_tokens=10,
        streaming=True,
        temperature=0,
        callback_manager=callback_manager,
        verbose=True,
    )
    result = await llm.agenerate(["Write me a sentence with 100 words."])
    assert callback_handler.llm_streams != 0
    assert isinstance(result, LLMResult)
 def test_openai_modelname_to_contextsize_valid() -> None:
    """Test model name to context size on a valid model."""
    assert OpenAI().modelname_to_contextsize("davinci") == 2049
--- a/libs/langchain/tests/integration_tests/memory/test_cosmos_db.py
+++ b/libs/langchain/tests/integration_tests/memory/test_cosmos_db.py
@ -6,8 +6,8 @@ from langchain.memory.chat_message_histories import CosmosDBChatMessageHistory
 from langchain.schema.messages import _message_to_dict
 # Replace these with your Azure Cosmos DB endpoint and key
-endpoint = os.environ["COSMOS_DB_ENDPOINT"]
+endpoint = os.environ.get("COSMOS_DB_ENDPOINT", "")
-credential = os.environ["COSMOS_DB_KEY"]
+credential = os.environ.get("COSMOS_DB_KEY", "")
 def test_memory_with_message_store() -> None:
--- a/libs/langchain/tests/integration_tests/memory/test_momento.py
+++ b/libs/langchain/tests/integration_tests/memory/test_momento.py
@ -10,7 +10,6 @@ from datetime import timedelta
 from typing import Iterator
 import pytest
 from momento import CacheClient, Configurations, CredentialProvider
 from langchain.memory import ConversationBufferMemory
 from langchain.memory.chat_message_histories import MomentoChatMessageHistory
@ -23,6 +22,8 @@ def random_string() -> str:
@pytest.fixture(scope="function")
 def message_history() -> Iterator[MomentoChatMessageHistory]:
    from momento import CacheClient, Configurations, CredentialProvider
    cache_name = f"langchain-test-cache-{random_string()}"
    client = CacheClient(
        Configurations.Laptop.v1(),
--- a/libs/langchain/tests/integration_tests/memory/test_mongodb.py
+++ b/libs/langchain/tests/integration_tests/memory/test_mongodb.py
@ -6,7 +6,7 @@ from langchain.memory.chat_message_histories import MongoDBChatMessageHistory
 from langchain.schema.messages import _message_to_dict
 # Replace these with your mongodb connection string
-connection_string = os.environ["MONGODB_CONNECTION_STRING"]
+connection_string = os.environ.get("MONGODB_CONNECTION_STRING", "")
 def test_memory_with_message_store() -> None:
--- a/libs/langchain/tests/integration_tests/retrievers/docarray/fixtures.py
+++ b/libs/langchain/tests/integration_tests/retrievers/docarray/fixtures.py
@ -1,9 +1,13 @@
 from __future__ import annotations
 from pathlib import Path
-from typing import Any, Dict, Generator, Tuple
+from typing import TYPE_CHECKING, Any, Dict, Generator, Tuple
 import numpy as np
 import pytest
-from docarray import BaseDoc
+from pydantic import Field
 if TYPE_CHECKING:
    from docarray.index import (
        ElasticDocIndex,
        HnswDocumentIndex,
@ -12,18 +16,27 @@ from docarray.index import (
        WeaviateDocumentIndex,
    )
    from docarray.typing import NdArray
 from pydantic import Field
    from qdrant_client.http import models as rest
 from langchain.embeddings import FakeEmbeddings
-class MyDoc(BaseDoc):
+@pytest.fixture
-    title: str
+def init_weaviate() -> (
-    title_embedding: NdArray[32]  # type: ignore
+    Generator[
-    other_emb: NdArray[32]  # type: ignore
+        Tuple[WeaviateDocumentIndex, Dict[str, Any], FakeEmbeddings],
-    year: int
+        None,
-
+        None,
    ]
 ):
    """
    cd tests/integration_tests/vectorstores/docker-compose
    docker compose -f weaviate.yml up
    """
    from docarray import BaseDoc
    from docarray.index import (
        WeaviateDocumentIndex,
    )
    class WeaviateDoc(BaseDoc):
        # When initializing the Weaviate index, denote the field
@ -33,19 +46,6 @@ class WeaviateDoc(BaseDoc):
        other_emb: NdArray[32]  # type: ignore
        year: int
@pytest.fixture
 def init_weaviate() -> (
    Generator[
        Tuple[WeaviateDocumentIndex[WeaviateDoc], Dict[str, Any], FakeEmbeddings],
        None,
        None,
    ]
 ):
    """
    cd tests/integration_tests/vectorstores/docker-compose
    docker compose -f weaviate.yml up
    """
    embeddings = FakeEmbeddings(size=32)
    # initialize WeaviateDocumentIndex
@ -76,12 +76,23 @@ def init_weaviate() -> (
@pytest.fixture
 def init_elastic() -> (
-    Generator[Tuple[ElasticDocIndex[MyDoc], Dict[str, Any], FakeEmbeddings], None, None]
+    Generator[Tuple[ElasticDocIndex, Dict[str, Any], FakeEmbeddings], None, None]
 ):
    """
    cd tests/integration_tests/vectorstores/docker-compose
    docker-compose -f elasticsearch.yml up
    """
    from docarray import BaseDoc
    from docarray.index import (
        ElasticDocIndex,
    )
    class MyDoc(BaseDoc):
        title: str
        title_embedding: NdArray[32]  # type: ignore
        other_emb: NdArray[32]  # type: ignore
        year: int
    embeddings = FakeEmbeddings(size=32)
    # initialize ElasticDocIndex
@ -109,7 +120,16 @@ def init_elastic() -> (
@pytest.fixture
-def init_qdrant() -> Tuple[QdrantDocumentIndex[MyDoc], rest.Filter, FakeEmbeddings]:
+def init_qdrant() -> Tuple[QdrantDocumentIndex, rest.Filter, FakeEmbeddings]:
    from docarray import BaseDoc
    from docarray.index import QdrantDocumentIndex
    class MyDoc(BaseDoc):
        title: str
        title_embedding: NdArray[32]  # type: ignore
        other_emb: NdArray[32]  # type: ignore
        year: int
    embeddings = FakeEmbeddings(size=32)
    # initialize QdrantDocumentIndex
@ -144,9 +164,16 @@ def init_qdrant() -> Tuple[QdrantDocumentIndex[MyDoc], rest.Filter, FakeEmbeddin
@pytest.fixture
-def init_in_memory() -> (
+def init_in_memory() -> Tuple[InMemoryExactNNIndex, Dict[str, Any], FakeEmbeddings]:
-    Tuple[InMemoryExactNNIndex[MyDoc], Dict[str, Any], FakeEmbeddings]
+    from docarray import BaseDoc
-):
+    from docarray.index import InMemoryExactNNIndex
    class MyDoc(BaseDoc):
        title: str
        title_embedding: NdArray[32]  # type: ignore
        other_emb: NdArray[32]  # type: ignore
        year: int
    embeddings = FakeEmbeddings(size=32)
    # initialize InMemoryExactNNIndex
@ -172,7 +199,18 @@ def init_in_memory() -> (
@pytest.fixture
 def init_hnsw(
    tmp_path: Path,
-) -> Tuple[HnswDocumentIndex[MyDoc], Dict[str, Any], FakeEmbeddings]:
+) -> Tuple[HnswDocumentIndex, Dict[str, Any], FakeEmbeddings]:
    from docarray import BaseDoc
    from docarray.index import (
        HnswDocumentIndex,
    )
    class MyDoc(BaseDoc):
        title: str
        title_embedding: NdArray[32]  # type: ignore
        other_emb: NdArray[32]  # type: ignore
        year: int
    embeddings = FakeEmbeddings(size=32)
    # initialize InMemoryExactNNIndex
--- a/libs/langchain/tests/integration_tests/retrievers/docarray/test_backends.py
+++ b/libs/langchain/tests/integration_tests/retrievers/docarray/test_backends.py
@ -1,7 +1,6 @@
 from typing import Any
 import pytest
 from vcr.request import Request
 from langchain.retrievers import DocArrayRetriever
 from tests.integration_tests.retrievers.docarray.fixtures import (  # noqa: F401
@ -17,7 +16,7 @@ from tests.integration_tests.retrievers.docarray.fixtures import (  # noqa: F401
    "backend",
    ["init_hnsw", "init_in_memory", "init_qdrant", "init_elastic", "init_weaviate"],
 )
-def test_backends(request: Request, backend: Any) -> None:
+def test_backends(request: Any, backend: Any) -> None:
    index, filter_query, embeddings = request.getfixturevalue(backend)
    # create a retriever
--- a/libs/langchain/tests/integration_tests/retrievers/test_weaviate_hybrid_search.py
+++ b/libs/langchain/tests/integration_tests/retrievers/test_weaviate_hybrid_search.py
@ -6,7 +6,6 @@ from typing import Generator, Union
 from uuid import uuid4
 import pytest
 from weaviate import Client
 from langchain.docstore.document import Document
 from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever
@ -28,6 +27,8 @@ class TestWeaviateHybridSearchRetriever:
    @pytest.fixture(scope="class", autouse=True)
    def weaviate_url(self) -> Union[str, Generator[str, None, None]]:
        """Return the weaviate url."""
        from weaviate import Client
        url = "http://localhost:8080"
        yield url
@ -38,6 +39,8 @@ class TestWeaviateHybridSearchRetriever:
    @pytest.mark.vcr(ignore_localhost=True)
    def test_get_relevant_documents(self, weaviate_url: str) -> None:
        """Test end to end construction and MRR search."""
        from weaviate import Client
        texts = ["foo", "bar", "baz"]
        metadatas = [{"page": i} for i in range(len(texts))]
@ -64,6 +67,8 @@ class TestWeaviateHybridSearchRetriever:
    @pytest.mark.vcr(ignore_localhost=True)
    def test_get_relevant_documents_with_score(self, weaviate_url: str) -> None:
        """Test end to end construction and MRR search."""
        from weaviate import Client
        texts = ["foo", "bar", "baz"]
        metadatas = [{"page": i} for i in range(len(texts))]
@ -87,6 +92,8 @@ class TestWeaviateHybridSearchRetriever:
    @pytest.mark.vcr(ignore_localhost=True)
    def test_get_relevant_documents_with_filter(self, weaviate_url: str) -> None:
        """Test end to end construction and MRR search."""
        from weaviate import Client
        texts = ["foo", "bar", "baz"]
        metadatas = [{"page": i} for i in range(len(texts))]
@ -113,6 +120,8 @@ class TestWeaviateHybridSearchRetriever:
    @pytest.mark.vcr(ignore_localhost=True)
    def test_get_relevant_documents_with_uuids(self, weaviate_url: str) -> None:
        """Test end to end construction and MRR search."""
        from weaviate import Client
        texts = ["foo", "bar", "baz"]
        metadatas = [{"page": i} for i in range(len(texts))]
        # Weaviate replaces the object if the UUID already exists
--- a/libs/langchain/tests/integration_tests/vectorstores/qdrant/async_api/test_add_texts.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/qdrant/async_api/test_add_texts.py
@ -2,7 +2,6 @@ import uuid
 from typing import Optional
 import pytest
 from qdrant_client.http import models as rest
 from langchain.vectorstores import Qdrant
 from tests.integration_tests.vectorstores.fake_embeddings import (
@ -69,6 +68,7 @@ async def test_qdrant_aadd_texts_stores_ids(
 ) -> None:
    """Test end to end Qdrant.aadd_texts stores provided ids."""
    from qdrant_client import QdrantClient
    from qdrant_client.http import models as rest
    ids = [
        "fa38d572-4c31-4579-aedc-1960d79df6df",
@ -101,6 +101,7 @@ async def test_qdrant_aadd_texts_stores_embeddings_as_named_vectors(
 ) -> None:
    """Test end to end Qdrant.aadd_texts stores named vectors if name is provided."""
    from qdrant_client import QdrantClient
    from qdrant_client.http import models as rest
    collection_name = uuid.uuid4().hex
--- a/libs/langchain/tests/integration_tests/vectorstores/qdrant/async_api/test_similarity_search.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/qdrant/async_api/test_similarity_search.py
@ -2,7 +2,6 @@ from typing import Optional
 import numpy as np
 import pytest
 from qdrant_client.http import models as rest
 from langchain.schema import Document
 from langchain.vectorstores import Qdrant
@ -239,6 +238,8 @@ async def test_qdrant_similarity_search_filters_with_qdrant_filters(
    qdrant_location: str,
 ) -> None:
    """Test end to end construction and search."""
    from qdrant_client.http import models as rest
    texts = ["foo", "bar", "baz"]
    metadatas = [
        {"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
--- a/libs/langchain/tests/integration_tests/vectorstores/qdrant/test_add_texts.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/qdrant/test_add_texts.py
@ -2,7 +2,6 @@ import uuid
 from typing import Optional
 import pytest
 from qdrant_client.http import models as rest
 from langchain.schema import Document
 from langchain.vectorstores import Qdrant
@ -81,6 +80,7 @@ def test_qdrant_add_texts_stores_duplicated_texts(vector_name: Optional[str]) ->
 def test_qdrant_add_texts_stores_ids(batch_size: int) -> None:
    """Test end to end Qdrant.add_texts stores provided ids."""
    from qdrant_client import QdrantClient
    from qdrant_client.http import models as rest
    ids = [
        "fa38d572-4c31-4579-aedc-1960d79df6df",
@ -107,6 +107,7 @@ def test_qdrant_add_texts_stores_ids(batch_size: int) -> None:
 def test_qdrant_add_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
    """Test end to end Qdrant.add_texts stores named vectors if name is provided."""
    from qdrant_client import QdrantClient
    from qdrant_client.http import models as rest
    collection_name = uuid.uuid4().hex
--- a/libs/langchain/tests/integration_tests/vectorstores/qdrant/test_similarity_search.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/qdrant/test_similarity_search.py
@ -2,7 +2,6 @@ from typing import Optional
 import numpy as np
 import pytest
 from qdrant_client.http import models as rest
 from langchain.schema import Document
 from langchain.vectorstores import Qdrant
@ -209,6 +208,8 @@ def test_qdrant_similarity_search_filters_with_qdrant_filters(
    vector_name: Optional[str],
 ) -> None:
    """Test end to end construction and search."""
    from qdrant_client.http import models as rest
    texts = ["foo", "bar", "baz"]
    metadatas = [
        {"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
--- a/libs/langchain/tests/integration_tests/vectorstores/test_azuresearch.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_azuresearch.py
@ -1,7 +1,6 @@
 import os
 import time
 import openai
 import pytest
 from dotenv import load_dotenv
@ -10,13 +9,7 @@ from langchain.vectorstores.azuresearch import AzureSearch
 load_dotenv()
-# Azure OpenAI settings
+model = os.getenv("OPENAI_EMBEDDINGS_ENGINE_DOC", "text-embedding-ada-002")
 openai.api_type = "azure"
 openai.api_base = os.getenv("OPENAI_API_BASE", "")
 openai.api_version = "2023-05-15"
 openai.api_key = os.getenv("OPENAI_API_KEY", "")
 model: str = os.getenv("OPENAI_EMBEDDINGS_ENGINE_DOC", "text-embedding-ada-002")
 # Vector store settings
 vector_store_address: str = os.getenv("AZURE_SEARCH_ENDPOINT", "")
 vector_store_password: str = os.getenv("AZURE_SEARCH_ADMIN_KEY", "")
--- a/libs/langchain/tests/integration_tests/vectorstores/test_deeplake.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_deeplake.py
@ -1,5 +1,4 @@
 """Test Deep Lake functionality."""
 import deeplake
 import pytest
 from pytest import FixtureRequest
@ -53,6 +52,8 @@ def test_deeplake_with_metadatas() -> None:
 def test_deeplakewith_persistence() -> None:
    """Test end to end construction and search, with persistence."""
    import deeplake
    dataset_path = "./tests/persist_dir"
    if deeplake.exists(dataset_path):
        deeplake.delete(dataset_path)
@ -84,6 +85,8 @@ def test_deeplakewith_persistence() -> None:
 def test_deeplake_overwrite_flag() -> None:
    """Test overwrite behavior"""
    import deeplake
    dataset_path = "./tests/persist_dir"
    if deeplake.exists(dataset_path):
        deeplake.delete(dataset_path)
@ -234,6 +237,8 @@ def test_delete_dataset_by_filter(deeplake_datastore: DeepLake) -> None:
 def test_delete_by_path(deeplake_datastore: DeepLake) -> None:
    """Test delete dataset."""
    import deeplake
    path = deeplake_datastore.dataset_path
    DeepLake.force_delete_by_path(path)
    assert not deeplake.exists(path)
--- a/libs/langchain/tests/integration_tests/vectorstores/test_elasticsearch.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_elasticsearch.py
@ -5,7 +5,6 @@ import uuid
 from typing import Generator, List, Union
 import pytest
 from elasticsearch import Elasticsearch
 from langchain.docstore.document import Document
 from langchain.embeddings import OpenAIEmbeddings
@ -29,6 +28,8 @@ class TestElasticsearch:
    @pytest.fixture(scope="class", autouse=True)
    def elasticsearch_url(self) -> Union[str, Generator[str, None, None]]:
        """Return the elasticsearch url."""
        from elasticsearch import Elasticsearch
        url = "http://localhost:9200"
        yield url
        es = Elasticsearch(hosts=url)
@ -108,6 +109,7 @@ class TestElasticsearch:
    ) -> None:
        """This test checks the construction of a custom
        ElasticSearch index using the 'from_documents'."""
        from elasticsearch import Elasticsearch
        index_name = f"custom_index_{uuid.uuid4().hex}"
        elastic_vector_search = ElasticVectorSearch.from_documents(
@ -134,6 +136,7 @@ class TestElasticsearch:
    ) -> None:
        """This test checks the construction of a custom
        ElasticSearch index using the 'add_documents'."""
        from elasticsearch import Elasticsearch
        index_name = f"custom_index_{uuid.uuid4().hex}"
        elastic_vector_search = ElasticVectorSearch(
--- a/libs/langchain/tests/integration_tests/vectorstores/test_lancedb.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_lancedb.py
@ -1,10 +1,10 @@
 import lancedb
 from langchain.vectorstores import LanceDB
 from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 def test_lancedb() -> None:
    import lancedb
    embeddings = FakeEmbeddings()
    db = lancedb.connect("/tmp/lancedb")
    texts = ["text 1", "text 2", "item 3"]
@ -24,6 +24,8 @@ def test_lancedb() -> None:
 def test_lancedb_add_texts() -> None:
    import lancedb
    embeddings = FakeEmbeddings()
    db = lancedb.connect("/tmp/lancedb")
    texts = ["text 1"]
--- a/libs/langchain/tests/integration_tests/vectorstores/test_marqo.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_marqo.py
@ -1,7 +1,6 @@
 """Test Marqo functionality."""
 from typing import Dict
 import marqo
 import pytest
 from langchain.docstore.document import Document
@ -14,6 +13,8 @@ INDEX_NAME = "langchain-integration-tests"
@pytest.fixture
 def client() -> Marqo:
    import marqo
    # fixture for marqo client to be used throughout testing, resets the index
    client = marqo.Client(url=DEFAULT_MARQO_URL, api_key=DEFAULT_MARQO_API_KEY)
    try:
@ -128,6 +129,8 @@ def test_marqo_weighted_query(client: Marqo) -> None:
 def test_marqo_multimodal() -> None:
    import marqo
    client = marqo.Client(url=DEFAULT_MARQO_URL, api_key=DEFAULT_MARQO_API_KEY)
    try:
        client.index(INDEX_NAME).delete()
--- a/libs/langchain/tests/integration_tests/vectorstores/test_meilisearch.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_meilisearch.py
@ -1,7 +1,6 @@
 """Test Meilisearch functionality."""
-from typing import Generator
+from typing import TYPE_CHECKING, Generator
 import meilisearch
 import pytest
 import requests
@ -9,6 +8,9 @@ from langchain.docstore.document import Document
 from langchain.vectorstores import Meilisearch
 from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 if TYPE_CHECKING:
    import meilisearch
 INDEX_NAME = "test-langchain-demo"
 TEST_MEILI_HTTP_ADDR = "http://localhost:7700"
 TEST_MEILI_MASTER_KEY = "masterKey"
@ -49,7 +51,9 @@ class TestMeilisearchVectorSearch:
            task = client.index(index.uid).delete()
            client.wait_for_task(task.task_uid)
-    def client(self) -> meilisearch.Client:
+    def client(self) -> "meilisearch.Client":
        import meilisearch
        return meilisearch.Client(TEST_MEILI_HTTP_ADDR, TEST_MEILI_MASTER_KEY)
    def _wait_last_task(self) -> None:
--- a/libs/langchain/tests/integration_tests/vectorstores/test_mongodb_atlas.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_mongodb_atlas.py
@ -3,7 +3,7 @@ from __future__ import annotations
 import os
 from time import sleep
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 import pytest
@ -21,23 +21,27 @@ DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")
 # Instantiate as constant instead of pytest fixture to prevent needing to make multiple
 # connections.
-TEST_CLIENT: MongoClient = MongoClient(CONNECTION_STRING)
+
-collection = TEST_CLIENT[DB_NAME][COLLECTION_NAME]
+
@pytest.fixture
 def collection() -> Any:
    test_client = MongoClient(CONNECTION_STRING)
    return test_client[DB_NAME][COLLECTION_NAME]
 class TestMongoDBAtlasVectorSearch:
    @classmethod
-    def setup_class(cls) -> None:
+    def setup_class(cls, collection: Any) -> None:
        # insure the test collection is empty
        assert collection.count_documents({}) == 0  # type: ignore[index]  # noqa: E501
    @classmethod
-    def teardown_class(cls) -> None:
+    def teardown_class(cls, collection: Any) -> None:
        # delete all the documents in the collection
        collection.delete_many({})  # type: ignore[index]
    @pytest.fixture(autouse=True)
-    def setup(self) -> None:
+    def setup(self, collection: Any) -> None:
        # delete all the documents in the collection
        collection.delete_many({})  # type: ignore[index]
--- a/libs/langchain/tests/integration_tests/vectorstores/test_opensearch.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_opensearch.py
@ -1,8 +1,6 @@
 """Test OpenSearch functionality."""
 import boto3
 import pytest
 from opensearchpy import AWSV4SignerAuth
 from langchain.docstore.document import Document
 from langchain.vectorstores.opensearch_vector_search import (
@ -219,6 +217,9 @@ def test_opensearch_with_custom_field_name_appx_false() -> None:
 def test_opensearch_serverless_with_scripting_search_indexing_throws_error() -> None:
    """Test to validate indexing using Serverless without Approximate Search."""
    import boto3
    from opensearchpy import AWSV4SignerAuth
    region = "test-region"
    service = "aoss"
    credentials = boto3.Session().get_credentials()
@ -235,6 +236,9 @@ def test_opensearch_serverless_with_scripting_search_indexing_throws_error() ->
 def test_opensearch_serverless_with_lucene_engine_throws_error() -> None:
    """Test to validate indexing using lucene engine with Serverless."""
    import boto3
    from opensearchpy import AWSV4SignerAuth
    region = "test-region"
    service = "aoss"
    credentials = boto3.Session().get_credentials()
--- a/libs/langchain/tests/integration_tests/vectorstores/test_pinecone.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_pinecone.py
@ -2,16 +2,18 @@ import importlib
 import os
 import time
 import uuid
-from typing import List
+from typing import TYPE_CHECKING, List
 import numpy as np
 import pinecone
 import pytest
 from langchain.docstore.document import Document
 from langchain.embeddings import OpenAIEmbeddings
 from langchain.vectorstores.pinecone import Pinecone
 if TYPE_CHECKING:
    import pinecone
 index_name = "langchain-test-index"  # name of the index
 namespace_name = "langchain-test-namespace"  # name of the namespace
 dimension = 1536  # dimension of the embeddings
@ -32,10 +34,12 @@ def reset_pinecone() -> None:
 class TestPinecone:
-    index: pinecone.Index
+    index: "pinecone.Index"
    @classmethod
    def setup_class(cls) -> None:
        import pinecone
        reset_pinecone()
        cls.index = pinecone.Index(index_name)
--- a/libs/langchain/tests/integration_tests/vectorstores/test_rocksetdb.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_rocksetdb.py
@ -1,9 +1,6 @@
 import logging
 import os
 import rockset
 import rockset.models
 from langchain.docstore.document import Document
 from langchain.vectorstores.rocksetdb import Rockset
 from tests.integration_tests.vectorstores.fake_embeddings import (
@ -44,6 +41,9 @@ class TestRockset:
    @classmethod
    def setup_class(cls) -> None:
        import rockset
        import rockset.models
        assert os.environ.get("ROCKSET_API_KEY") is not None
        assert os.environ.get("ROCKSET_REGION") is not None
--- a/libs/langchain/tests/integration_tests/vectorstores/test_weaviate.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_weaviate.py
@ -5,7 +5,6 @@ import uuid
 from typing import Generator, Union
 import pytest
 from weaviate import Client
 from langchain.docstore.document import Document
 from langchain.embeddings.openai import OpenAIEmbeddings
@ -29,6 +28,8 @@ class TestWeaviate:
    @pytest.fixture(scope="class", autouse=True)
    def weaviate_url(self) -> Union[str, Generator[str, None, None]]:
        """Return the weaviate url."""
        from weaviate import Client
        url = "http://localhost:8080"
        yield url