langchain/tests/integration_tests/vectorstores/test_elasticsearch.py
sergerdn 6dc86ad48f
feat: add pytest-vcr for recording HTTP interactions in integration tests (#2445)
Using `pytest-vcr` in integration tests has several benefits. Firstly,
it removes the need to mock external services, as VCR records and
replays HTTP interactions on the fly. Secondly, it simplifies the
integration test setup by eliminating the need to set up and tear down
external services in some cases. Finally, it allows for more reliable
and deterministic integration tests by ensuring that HTTP interactions
are always replayed with the same response.
Overall, `pytest-vcr` is a valuable tool for simplifying integration
test setup and improving their reliability

This commit adds the `pytest-vcr` package as a dependency for
integration tests in the `pyproject.toml` file. It also introduces two
new fixtures in `tests/integration_tests/conftest.py` files for managing
cassette directories and VCR configurations.

In addition, the
`tests/integration_tests/vectorstores/test_elasticsearch.py` file has
been updated to use the `@pytest.mark.vcr` decorator for recording and
replaying HTTP interactions.

Finally, this commit removes the `documents` fixture from the
`test_elasticsearch.py` file and replaces it with a new fixture defined
in `tests/integration_tests/vectorstores/conftest.py` that yields a list
of documents to use in any other tests.

This also includes my second attempt to fix issue :
https://github.com/hwchase17/langchain/issues/2386

Maybe related https://github.com/hwchase17/langchain/issues/2484
2023-04-07 07:28:57 -07:00

151 lines
5.6 KiB
Python

"""Test ElasticSearch functionality."""
import logging
import os
import uuid
from typing import Generator, List, Union
import pytest
from elasticsearch import Elasticsearch
from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
logging.basicConfig(level=logging.DEBUG)
"""
cd tests/integration_tests/vectorstores/docker-compose
docker-compose -f elasticsearch.yml up
"""
class TestElasticsearch:
@pytest.fixture(scope="class", autouse=True)
def elasticsearch_url(self) -> Union[str, Generator[str, None, None]]:
"""Return the elasticsearch url."""
url = "http://localhost:9200"
yield url
es = Elasticsearch(hosts=url)
# Clear all indexes
index_names = es.indices.get(index="_all").keys()
for index_name in index_names:
# print(index_name)
es.indices.delete(index=index_name)
@pytest.fixture(scope="class", autouse=True)
def openai_api_key(self) -> Union[str, Generator[str, None, None]]:
"""Return the OpenAI API key."""
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
raise ValueError("OPENAI_API_KEY environment variable is not set")
yield openai_api_key
@pytest.fixture(scope="class")
def documents(self) -> Generator[List[Document], None, None]:
"""Return a generator that yields a list of documents."""
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = TextLoader(
os.path.join(os.path.dirname(__file__), "fixtures", "sharks.txt")
).load()
yield text_splitter.split_documents(documents)
def test_similarity_search_without_metadata(self, elasticsearch_url: str) -> None:
"""Test end to end construction and search without metadata."""
texts = ["foo", "bar", "baz"]
docsearch = ElasticVectorSearch.from_texts(
texts, FakeEmbeddings(), elasticsearch_url=elasticsearch_url
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_similarity_search_with_metadata(self, elasticsearch_url: str) -> None:
"""Test end to end construction and search with metadata."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = ElasticVectorSearch.from_texts(
texts,
FakeEmbeddings(),
metadatas=metadatas,
elasticsearch_url=elasticsearch_url,
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"page": 0})]
@pytest.mark.vcr(ignore_localhost=True)
def test_default_index_from_documents(
self, documents: List[Document], openai_api_key: str, elasticsearch_url: str
) -> None:
"""This test checks the construction of a default
ElasticSearch index using the 'from_documents'."""
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
elastic_vector_search = ElasticVectorSearch.from_documents(
documents=documents,
embedding=embedding,
elasticsearch_url=elasticsearch_url,
)
search_result = elastic_vector_search.similarity_search("sharks")
print(search_result)
assert len(search_result) != 0
@pytest.mark.vcr(ignore_localhost=True)
def test_custom_index_from_documents(
self, documents: List[Document], openai_api_key: str, elasticsearch_url: str
) -> None:
"""This test checks the construction of a custom
ElasticSearch index using the 'from_documents'."""
index_name = f"custom_index_{uuid.uuid4().hex}"
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
elastic_vector_search = ElasticVectorSearch.from_documents(
documents=documents,
embedding=embedding,
elasticsearch_url=elasticsearch_url,
index_name=index_name,
)
es = Elasticsearch(hosts=elasticsearch_url)
index_names = es.indices.get(index="_all").keys()
assert index_name in index_names
search_result = elastic_vector_search.similarity_search("sharks")
print(search_result)
assert len(search_result) != 0
@pytest.mark.vcr(ignore_localhost=True)
def test_custom_index_add_documents(
self, documents: List[Document], openai_api_key: str, elasticsearch_url: str
) -> None:
"""This test checks the construction of a custom
ElasticSearch index using the 'add_documents'."""
index_name = f"custom_index_{uuid.uuid4().hex}"
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
elastic_vector_search = ElasticVectorSearch(
embedding=embedding,
elasticsearch_url=elasticsearch_url,
index_name=index_name,
)
es = Elasticsearch(hosts=elasticsearch_url)
elastic_vector_search.add_documents(documents)
index_names = es.indices.get(index="_all").keys()
assert index_name in index_names
search_result = elastic_vector_search.similarity_search("sharks")
print(search_result)
assert len(search_result) != 0
def test_custom_index_add_documents_to_exists_store(self) -> None:
# TODO: implement it
pass