mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-03 13:43:24 +00:00
Using `pytest-vcr` in integration tests has several benefits. Firstly, it removes the need to mock external services, as VCR records and replays HTTP interactions on the fly. Secondly, it simplifies the integration test setup by eliminating the need to set up and tear down external services in some cases. Finally, it allows for more reliable and deterministic integration tests by ensuring that HTTP interactions are always replayed with the same response. Overall, `pytest-vcr` is a valuable tool for simplifying integration test setup and improving their reliability This commit adds the `pytest-vcr` package as a dependency for integration tests in the `pyproject.toml` file. It also introduces two new fixtures in `tests/integration_tests/conftest.py` files for managing cassette directories and VCR configurations. In addition, the `tests/integration_tests/vectorstores/test_elasticsearch.py` file has been updated to use the `@pytest.mark.vcr` decorator for recording and replaying HTTP interactions. Finally, this commit removes the `documents` fixture from the `test_elasticsearch.py` file and replaces it with a new fixture defined in `tests/integration_tests/vectorstores/conftest.py` that yields a list of documents to use in any other tests. This also includes my second attempt to fix issue : https://github.com/hwchase17/langchain/issues/2386 Maybe related https://github.com/hwchase17/langchain/issues/2484
151 lines
5.6 KiB
Python
151 lines
5.6 KiB
Python
"""Test ElasticSearch functionality."""
|
|
import logging
|
|
import os
|
|
import uuid
|
|
from typing import Generator, List, Union
|
|
|
|
import pytest
|
|
from elasticsearch import Elasticsearch
|
|
|
|
from langchain.docstore.document import Document
|
|
from langchain.document_loaders import TextLoader
|
|
from langchain.embeddings import OpenAIEmbeddings
|
|
from langchain.text_splitter import CharacterTextSplitter
|
|
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
|
|
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
|
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
|
|
"""
|
|
cd tests/integration_tests/vectorstores/docker-compose
|
|
docker-compose -f elasticsearch.yml up
|
|
"""
|
|
|
|
|
|
class TestElasticsearch:
|
|
@pytest.fixture(scope="class", autouse=True)
|
|
def elasticsearch_url(self) -> Union[str, Generator[str, None, None]]:
|
|
"""Return the elasticsearch url."""
|
|
url = "http://localhost:9200"
|
|
yield url
|
|
es = Elasticsearch(hosts=url)
|
|
|
|
# Clear all indexes
|
|
index_names = es.indices.get(index="_all").keys()
|
|
for index_name in index_names:
|
|
# print(index_name)
|
|
es.indices.delete(index=index_name)
|
|
|
|
@pytest.fixture(scope="class", autouse=True)
|
|
def openai_api_key(self) -> Union[str, Generator[str, None, None]]:
|
|
"""Return the OpenAI API key."""
|
|
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
if not openai_api_key:
|
|
raise ValueError("OPENAI_API_KEY environment variable is not set")
|
|
|
|
yield openai_api_key
|
|
|
|
@pytest.fixture(scope="class")
|
|
def documents(self) -> Generator[List[Document], None, None]:
|
|
"""Return a generator that yields a list of documents."""
|
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
|
|
|
documents = TextLoader(
|
|
os.path.join(os.path.dirname(__file__), "fixtures", "sharks.txt")
|
|
).load()
|
|
yield text_splitter.split_documents(documents)
|
|
|
|
def test_similarity_search_without_metadata(self, elasticsearch_url: str) -> None:
|
|
"""Test end to end construction and search without metadata."""
|
|
texts = ["foo", "bar", "baz"]
|
|
docsearch = ElasticVectorSearch.from_texts(
|
|
texts, FakeEmbeddings(), elasticsearch_url=elasticsearch_url
|
|
)
|
|
output = docsearch.similarity_search("foo", k=1)
|
|
assert output == [Document(page_content="foo")]
|
|
|
|
def test_similarity_search_with_metadata(self, elasticsearch_url: str) -> None:
|
|
"""Test end to end construction and search with metadata."""
|
|
texts = ["foo", "bar", "baz"]
|
|
metadatas = [{"page": i} for i in range(len(texts))]
|
|
docsearch = ElasticVectorSearch.from_texts(
|
|
texts,
|
|
FakeEmbeddings(),
|
|
metadatas=metadatas,
|
|
elasticsearch_url=elasticsearch_url,
|
|
)
|
|
output = docsearch.similarity_search("foo", k=1)
|
|
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
|
|
|
@pytest.mark.vcr(ignore_localhost=True)
|
|
def test_default_index_from_documents(
|
|
self, documents: List[Document], openai_api_key: str, elasticsearch_url: str
|
|
) -> None:
|
|
"""This test checks the construction of a default
|
|
ElasticSearch index using the 'from_documents'."""
|
|
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
|
|
|
elastic_vector_search = ElasticVectorSearch.from_documents(
|
|
documents=documents,
|
|
embedding=embedding,
|
|
elasticsearch_url=elasticsearch_url,
|
|
)
|
|
|
|
search_result = elastic_vector_search.similarity_search("sharks")
|
|
|
|
print(search_result)
|
|
assert len(search_result) != 0
|
|
|
|
@pytest.mark.vcr(ignore_localhost=True)
|
|
def test_custom_index_from_documents(
|
|
self, documents: List[Document], openai_api_key: str, elasticsearch_url: str
|
|
) -> None:
|
|
"""This test checks the construction of a custom
|
|
ElasticSearch index using the 'from_documents'."""
|
|
|
|
index_name = f"custom_index_{uuid.uuid4().hex}"
|
|
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
|
elastic_vector_search = ElasticVectorSearch.from_documents(
|
|
documents=documents,
|
|
embedding=embedding,
|
|
elasticsearch_url=elasticsearch_url,
|
|
index_name=index_name,
|
|
)
|
|
es = Elasticsearch(hosts=elasticsearch_url)
|
|
index_names = es.indices.get(index="_all").keys()
|
|
assert index_name in index_names
|
|
|
|
search_result = elastic_vector_search.similarity_search("sharks")
|
|
print(search_result)
|
|
|
|
assert len(search_result) != 0
|
|
|
|
@pytest.mark.vcr(ignore_localhost=True)
|
|
def test_custom_index_add_documents(
|
|
self, documents: List[Document], openai_api_key: str, elasticsearch_url: str
|
|
) -> None:
|
|
"""This test checks the construction of a custom
|
|
ElasticSearch index using the 'add_documents'."""
|
|
|
|
index_name = f"custom_index_{uuid.uuid4().hex}"
|
|
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
|
elastic_vector_search = ElasticVectorSearch(
|
|
embedding=embedding,
|
|
elasticsearch_url=elasticsearch_url,
|
|
index_name=index_name,
|
|
)
|
|
es = Elasticsearch(hosts=elasticsearch_url)
|
|
elastic_vector_search.add_documents(documents)
|
|
|
|
index_names = es.indices.get(index="_all").keys()
|
|
assert index_name in index_names
|
|
|
|
search_result = elastic_vector_search.similarity_search("sharks")
|
|
print(search_result)
|
|
|
|
assert len(search_result) != 0
|
|
|
|
def test_custom_index_add_documents_to_exists_store(self) -> None:
|
|
# TODO: implement it
|
|
pass
|