mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 21:47:12 +00:00
Elasticsearch Query Retriever: Use match + fuzziness for LIKE (#12023)
Updated the elasticsearch self query retriever to use the match clause for LIKE operator instead of the non-analyzed fuzzy search clause. Other small updates include: - fixing the stack inference integration test where the index's default pipeline didn't use the inference pipeline created - adding a user-agent to the old implementation to track usage - improved the documentation for ElasticsearchStore filters
This commit is contained in:
@@ -39,7 +39,7 @@ class ElasticsearchTranslator(Visitor):
|
||||
Comparator.LT: "lt",
|
||||
Comparator.LTE: "lte",
|
||||
Comparator.CONTAIN: "match",
|
||||
Comparator.LIKE: "fuzzy",
|
||||
Comparator.LIKE: "match",
|
||||
}
|
||||
return map_dict[func]
|
||||
|
||||
@@ -67,15 +67,19 @@ class ElasticsearchTranslator(Visitor):
|
||||
}
|
||||
}
|
||||
|
||||
if comparison.comparator == Comparator.LIKE:
|
||||
if comparison.comparator == Comparator.CONTAIN:
|
||||
return {
|
||||
self._format_func(comparison.comparator): {
|
||||
field: {"value": comparison.value, "fuzziness": "AUTO"}
|
||||
field: {"query": comparison.value}
|
||||
}
|
||||
}
|
||||
|
||||
if comparison.comparator == Comparator.CONTAIN:
|
||||
return {self._format_func(comparison.comparator): {field: comparison.value}}
|
||||
if comparison.comparator == Comparator.LIKE:
|
||||
return {
|
||||
self._format_func(comparison.comparator): {
|
||||
field: {"query": comparison.value, "fuzziness": "AUTO"}
|
||||
}
|
||||
}
|
||||
|
||||
# we assume that if the value is a string,
|
||||
# we want to use the keyword field
|
||||
|
@@ -156,12 +156,22 @@ class ElasticVectorSearch(VectorStore):
|
||||
self.index_name = index_name
|
||||
_ssl_verify = ssl_verify or {}
|
||||
try:
|
||||
self.client = elasticsearch.Elasticsearch(elasticsearch_url, **_ssl_verify)
|
||||
self.client = elasticsearch.Elasticsearch(
|
||||
elasticsearch_url,
|
||||
**_ssl_verify,
|
||||
headers={"user-agent": self.get_user_agent()},
|
||||
)
|
||||
except ValueError as e:
|
||||
raise ValueError(
|
||||
f"Your elasticsearch client string is mis-formatted. Got error: {e} "
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_user_agent() -> str:
|
||||
from langchain import __version__
|
||||
|
||||
return f"langchain-py-dvs/{__version__}"
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Embeddings:
|
||||
return self.embedding
|
||||
|
@@ -531,7 +531,7 @@ class TestElasticsearch:
|
||||
},
|
||||
}
|
||||
},
|
||||
settings={"index": {"default_pipeline": "pipeline"}},
|
||||
settings={"index": {"default_pipeline": "test_pipeline"}},
|
||||
)
|
||||
|
||||
# adding documents to the index
|
||||
|
@@ -49,14 +49,14 @@ def test_visit_comparison_range_lte() -> None:
|
||||
|
||||
def test_visit_comparison_range_match() -> None:
|
||||
comp = Comparison(comparator=Comparator.CONTAIN, attribute="foo", value="1")
|
||||
expected = {"match": {"metadata.foo": "1"}}
|
||||
expected = {"match": {"metadata.foo": {"query": "1"}}}
|
||||
actual = DEFAULT_TRANSLATOR.visit_comparison(comp)
|
||||
assert expected == actual
|
||||
|
||||
|
||||
def test_visit_comparison_range_like() -> None:
|
||||
comp = Comparison(comparator=Comparator.LIKE, attribute="foo", value="bar")
|
||||
expected = {"fuzzy": {"metadata.foo": {"value": "bar", "fuzziness": "AUTO"}}}
|
||||
expected = {"match": {"metadata.foo": {"query": "bar", "fuzziness": "AUTO"}}}
|
||||
actual = DEFAULT_TRANSLATOR.visit_comparison(comp)
|
||||
assert expected == actual
|
||||
|
||||
@@ -200,9 +200,9 @@ def test_visit_structured_query_complex() -> None:
|
||||
"should": [
|
||||
{"range": {"metadata.bar": {"lt": 1}}},
|
||||
{
|
||||
"fuzzy": {
|
||||
"match": {
|
||||
"metadata.bar": {
|
||||
"value": "10",
|
||||
"query": "10",
|
||||
"fuzziness": "AUTO",
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user