Elasticsearch Query Retriever: Use match + fuzziness for LIKE (#12023)

Updated the elasticsearch self query retriever to use the match clause
for LIKE operator instead of the non-analyzed fuzzy search clause.

Other small updates include:
- fixing the stack inference integration test where the index's default
pipeline didn't use the inference pipeline created
- adding a user-agent to the old implementation to track usage
- improved the documentation for ElasticsearchStore filters
This commit is contained in:
Joe McElroy
2023-10-19 14:47:21 +01:00
committed by GitHub
parent 84d250f781
commit c9f1768cb9
6 changed files with 94 additions and 78 deletions

View File

@@ -39,7 +39,7 @@ class ElasticsearchTranslator(Visitor):
Comparator.LT: "lt",
Comparator.LTE: "lte",
Comparator.CONTAIN: "match",
Comparator.LIKE: "fuzzy",
Comparator.LIKE: "match",
}
return map_dict[func]
@@ -67,15 +67,19 @@ class ElasticsearchTranslator(Visitor):
}
}
if comparison.comparator == Comparator.LIKE:
if comparison.comparator == Comparator.CONTAIN:
return {
self._format_func(comparison.comparator): {
field: {"value": comparison.value, "fuzziness": "AUTO"}
field: {"query": comparison.value}
}
}
if comparison.comparator == Comparator.CONTAIN:
return {self._format_func(comparison.comparator): {field: comparison.value}}
if comparison.comparator == Comparator.LIKE:
return {
self._format_func(comparison.comparator): {
field: {"query": comparison.value, "fuzziness": "AUTO"}
}
}
# we assume that if the value is a string,
# we want to use the keyword field

View File

@@ -156,12 +156,22 @@ class ElasticVectorSearch(VectorStore):
self.index_name = index_name
_ssl_verify = ssl_verify or {}
try:
self.client = elasticsearch.Elasticsearch(elasticsearch_url, **_ssl_verify)
self.client = elasticsearch.Elasticsearch(
elasticsearch_url,
**_ssl_verify,
headers={"user-agent": self.get_user_agent()},
)
except ValueError as e:
raise ValueError(
f"Your elasticsearch client string is mis-formatted. Got error: {e} "
)
@staticmethod
def get_user_agent() -> str:
from langchain import __version__
return f"langchain-py-dvs/{__version__}"
@property
def embeddings(self) -> Embeddings:
return self.embedding

View File

@@ -531,7 +531,7 @@ class TestElasticsearch:
},
}
},
settings={"index": {"default_pipeline": "pipeline"}},
settings={"index": {"default_pipeline": "test_pipeline"}},
)
# adding documents to the index

View File

@@ -49,14 +49,14 @@ def test_visit_comparison_range_lte() -> None:
def test_visit_comparison_range_match() -> None:
comp = Comparison(comparator=Comparator.CONTAIN, attribute="foo", value="1")
expected = {"match": {"metadata.foo": "1"}}
expected = {"match": {"metadata.foo": {"query": "1"}}}
actual = DEFAULT_TRANSLATOR.visit_comparison(comp)
assert expected == actual
def test_visit_comparison_range_like() -> None:
comp = Comparison(comparator=Comparator.LIKE, attribute="foo", value="bar")
expected = {"fuzzy": {"metadata.foo": {"value": "bar", "fuzziness": "AUTO"}}}
expected = {"match": {"metadata.foo": {"query": "bar", "fuzziness": "AUTO"}}}
actual = DEFAULT_TRANSLATOR.visit_comparison(comp)
assert expected == actual
@@ -200,9 +200,9 @@ def test_visit_structured_query_complex() -> None:
"should": [
{"range": {"metadata.bar": {"lt": 1}}},
{
"fuzzy": {
"match": {
"metadata.bar": {
"value": "10",
"query": "10",
"fuzziness": "AUTO",
}
}