mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-01 19:12:42 +00:00
ElasticsearchStore: improve error logging for adding documents (#9648)
Not obvious what the error is when you cannot index. This pr adds the ability to log the first errors reason, to help the user diagnose the issue. Also added some more documentation for when you want to use the vectorstore with an embedding model deployed in elasticsearch. Credit: @elastic and @phoey1
This commit is contained in:
@@ -710,7 +710,7 @@ class ElasticsearchStore(VectorStore):
|
||||
after deleting documents. Defaults to True.
|
||||
"""
|
||||
try:
|
||||
from elasticsearch.helpers import bulk
|
||||
from elasticsearch.helpers import BulkIndexError, bulk
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import elasticsearch python package. "
|
||||
@@ -731,8 +731,10 @@ class ElasticsearchStore(VectorStore):
|
||||
logger.debug(f"Deleted {len(body)} texts from index")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
except BulkIndexError as e:
|
||||
logger.error(f"Error deleting texts: {e}")
|
||||
firstError = e.errors[0].get("index", {}).get("error", {})
|
||||
logger.error(f"First error reason: {firstError.get('reason')}")
|
||||
raise e
|
||||
|
||||
else:
|
||||
@@ -801,7 +803,7 @@ class ElasticsearchStore(VectorStore):
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
"""
|
||||
try:
|
||||
from elasticsearch.helpers import bulk
|
||||
from elasticsearch.helpers import BulkIndexError, bulk
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import elasticsearch python package. "
|
||||
@@ -867,8 +869,10 @@ class ElasticsearchStore(VectorStore):
|
||||
|
||||
logger.debug(f"added texts {ids} to index")
|
||||
return ids
|
||||
except Exception as e:
|
||||
except BulkIndexError as e:
|
||||
logger.error(f"Error adding texts: {e}")
|
||||
firstError = e.errors[0].get("index", {}).get("error", {})
|
||||
logger.error(f"First error reason: {firstError.get('reason')}")
|
||||
raise e
|
||||
|
||||
else:
|
||||
|
@@ -5,6 +5,7 @@ import uuid
|
||||
from typing import Generator, List, Union
|
||||
|
||||
import pytest
|
||||
from elasticsearch.helpers import BulkIndexError
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.vectorstores.elasticsearch import ElasticsearchStore
|
||||
@@ -480,6 +481,8 @@ class TestElasticsearch:
|
||||
document={"text_field": text, "metadata": {}},
|
||||
)
|
||||
|
||||
docsearch.client.indices.refresh(index=index_name)
|
||||
|
||||
def assert_query(query_body: dict, query: str) -> dict:
|
||||
assert query_body == {
|
||||
"knn": {
|
||||
@@ -574,3 +577,33 @@ class TestElasticsearch:
|
||||
docsearch.delete([ids[3]])
|
||||
output = docsearch.similarity_search("gni", k=10)
|
||||
assert len(output) == 0
|
||||
|
||||
def test_elasticsearch_indexing_exception_error(
|
||||
self,
|
||||
elasticsearch_connection: dict,
|
||||
index_name: str,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""Test bulk exception logging is giving better hints."""
|
||||
|
||||
docsearch = ElasticsearchStore(
|
||||
embedding=ConsistentFakeEmbeddings(),
|
||||
**elasticsearch_connection,
|
||||
index_name=index_name,
|
||||
)
|
||||
|
||||
docsearch.client.indices.create(
|
||||
index=index_name,
|
||||
mappings={"properties": {}},
|
||||
settings={"index": {"default_pipeline": "not-existing-pipeline"}},
|
||||
)
|
||||
|
||||
texts = ["foo"]
|
||||
|
||||
with pytest.raises(BulkIndexError):
|
||||
docsearch.add_texts(texts)
|
||||
|
||||
error_reason = "pipeline with id [not-existing-pipeline] does not exist"
|
||||
log_message = f"First error reason: {error_reason}"
|
||||
|
||||
assert log_message in caplog.text
|
||||
|
Reference in New Issue
Block a user