mirror of
https://github.com/hwchase17/langchain.git
synced 2025-10-23 19:44:05 +00:00
fix: elasticsearch (#2402)
- Create a new docker-compose file to start an Elasticsearch instance for integration tests. - Add new tests to `test_elasticsearch.py` to verify Elasticsearch functionality. - Include an optional group `test_integration` in the `pyproject.toml` file. This group should contain dependencies for integration tests and can be installed using the command `poetry install --with test_integration`. Any new dependencies should be added by running `poetry add some_new_deps --group "test_integration" ` Note: New tests running in live mode, which involve end-to-end testing of the OpenAI API. In the future, adding `pytest-vcr` to record and replay all API requests would be a nice feature for testing process.More info: https://pytest-vcr.readthedocs.io/en/latest/ Fixes https://github.com/hwchase17/langchain/issues/2386
This commit is contained in:
@@ -1,29 +1,137 @@
|
||||
"""Test ElasticSearch functionality."""
|
||||
import logging
|
||||
import os
|
||||
from typing import Generator, List, Union
|
||||
|
||||
import pytest
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders import TextLoader
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
def test_elasticsearch() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = ElasticVectorSearch.from_texts(
|
||||
texts, FakeEmbeddings(), elasticsearch_url="http://localhost:9200"
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
"""
|
||||
cd tests/integration_tests/vectorstores/docker-compose
|
||||
docker-compose -f elasticsearch.yml up
|
||||
"""
|
||||
|
||||
|
||||
def test_elasticsearch_with_metadatas() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = ElasticVectorSearch.from_texts(
|
||||
texts,
|
||||
FakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
elasticsearch_url="http://localhost:9200",
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
class TestElasticsearch:
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def elasticsearch_url(self) -> Union[str, Generator[str, None, None]]:
|
||||
"""Return the elasticsearch url."""
|
||||
url = "http://localhost:9200"
|
||||
yield url
|
||||
es = Elasticsearch(hosts=url)
|
||||
|
||||
# Clear all indexes
|
||||
index_names = es.indices.get(index="_all").keys()
|
||||
for index_name in index_names:
|
||||
# print(index_name)
|
||||
es.indices.delete(index=index_name)
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def openai_api_key(self) -> Union[str, Generator[str, None, None]]:
|
||||
"""Return the OpenAI API key."""
|
||||
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
if not openai_api_key:
|
||||
raise ValueError("OPENAI_API_KEY environment variable is not set")
|
||||
|
||||
yield openai_api_key
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def documents(self) -> Generator[List[Document], None, None]:
|
||||
"""Return a generator that yields a list of documents."""
|
||||
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
||||
|
||||
documents = TextLoader(
|
||||
os.path.join(os.path.dirname(__file__), "fixtures", "sharks.txt")
|
||||
).load()
|
||||
yield text_splitter.split_documents(documents)
|
||||
|
||||
def test_similarity_search_without_metadata(self, elasticsearch_url: str) -> None:
|
||||
"""Test end to end construction and search without metadata."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = ElasticVectorSearch.from_texts(
|
||||
texts, FakeEmbeddings(), elasticsearch_url=elasticsearch_url
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
def test_similarity_search_with_metadata(self, elasticsearch_url: str) -> None:
|
||||
"""Test end to end construction and search with metadata."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = ElasticVectorSearch.from_texts(
|
||||
texts,
|
||||
FakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
elasticsearch_url=elasticsearch_url,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
|
||||
def test_default_index_from_documents(
|
||||
self, documents: List[Document], openai_api_key: str, elasticsearch_url: str
|
||||
) -> None:
|
||||
"""This test checks the construction of a default
|
||||
ElasticSearch index using the 'from_documents'."""
|
||||
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
||||
|
||||
elastic_vector_search = ElasticVectorSearch.from_documents(
|
||||
documents=documents,
|
||||
embedding=embedding,
|
||||
elasticsearch_url=elasticsearch_url,
|
||||
)
|
||||
|
||||
search_result = elastic_vector_search.similarity_search("sharks")
|
||||
|
||||
print(search_result)
|
||||
assert len(search_result) != 0
|
||||
|
||||
def test_custom_index_from_documents(
|
||||
self, documents: List[Document], openai_api_key: str, elasticsearch_url: str
|
||||
) -> None:
|
||||
"""This test checks the construction of a custom
|
||||
ElasticSearch index using the 'from_documents'."""
|
||||
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
||||
elastic_vector_search = ElasticVectorSearch.from_documents(
|
||||
documents=documents,
|
||||
embedding=embedding,
|
||||
elasticsearch_url=elasticsearch_url,
|
||||
index_name="custom_index",
|
||||
)
|
||||
es = Elasticsearch(hosts=elasticsearch_url)
|
||||
index_names = es.indices.get(index="_all").keys()
|
||||
assert "custom_index" in index_names
|
||||
|
||||
search_result = elastic_vector_search.similarity_search("sharks")
|
||||
print(search_result)
|
||||
|
||||
assert len(search_result) != 0
|
||||
|
||||
def test_custom_index_add_documents(
|
||||
self, documents: List[Document], openai_api_key: str, elasticsearch_url: str
|
||||
) -> None:
|
||||
"""This test checks the construction of a custom
|
||||
ElasticSearch index using the 'add_documents'."""
|
||||
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
||||
elastic_vector_search = ElasticVectorSearch(
|
||||
embedding=embedding,
|
||||
elasticsearch_url=elasticsearch_url,
|
||||
index_name="custom_index",
|
||||
)
|
||||
es = Elasticsearch(hosts=elasticsearch_url)
|
||||
index_names = es.indices.get(index="_all").keys()
|
||||
assert "custom_index" in index_names
|
||||
|
||||
elastic_vector_search.add_documents(documents)
|
||||
search_result = elastic_vector_search.similarity_search("sharks")
|
||||
print(search_result)
|
||||
|
||||
assert len(search_result) != 0
|
||||
|
||||
Reference in New Issue
Block a user