mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-21 18:39:57 +00:00
community[patch], langchain[minor]: Enhance Tencent Cloud VectorDB, langchain: make Tencent Cloud VectorDB self query retrieve compatible (#19651)
- make Tencent Cloud VectorDB support metadata filtering. - implement delete function for Tencent Cloud VectorDB. - support both Langchain Embedding model and Tencent Cloud VDB embedding model. - Tencent Cloud VectorDB support filter search keyword, compatible with langchain filtering syntax. - add Tencent Cloud VectorDB TranslationVisitor, now work with self query retriever. - more documentations. --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
@@ -18,6 +18,7 @@ from langchain_community.vectorstores import (
|
||||
Qdrant,
|
||||
Redis,
|
||||
SupabaseVectorStore,
|
||||
TencentVectorDB,
|
||||
TimescaleVector,
|
||||
Vectara,
|
||||
Weaviate,
|
||||
@@ -54,6 +55,7 @@ from langchain.retrievers.self_query.pinecone import PineconeTranslator
|
||||
from langchain.retrievers.self_query.qdrant import QdrantTranslator
|
||||
from langchain.retrievers.self_query.redis import RedisTranslator
|
||||
from langchain.retrievers.self_query.supabase import SupabaseVectorTranslator
|
||||
from langchain.retrievers.self_query.tencentvectordb import TencentVectorDBTranslator
|
||||
from langchain.retrievers.self_query.timescalevector import TimescaleVectorTranslator
|
||||
from langchain.retrievers.self_query.vectara import VectaraTranslator
|
||||
from langchain.retrievers.self_query.weaviate import WeaviateTranslator
|
||||
@@ -90,6 +92,11 @@ def _get_builtin_translator(vectorstore: VectorStore) -> Visitor:
|
||||
return MyScaleTranslator(metadata_key=vectorstore.metadata_column)
|
||||
elif isinstance(vectorstore, Redis):
|
||||
return RedisTranslator.from_vectorstore(vectorstore)
|
||||
elif isinstance(vectorstore, TencentVectorDB):
|
||||
fields = [
|
||||
field.name for field in (vectorstore.meta_fields or []) if field.index
|
||||
]
|
||||
return TencentVectorDBTranslator(fields)
|
||||
elif vectorstore.__class__ in BUILTIN_TRANSLATORS:
|
||||
return BUILTIN_TRANSLATORS[vectorstore.__class__]()
|
||||
else:
|
||||
|
@@ -0,0 +1,85 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Sequence, Tuple
|
||||
|
||||
from langchain.chains.query_constructor.ir import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
Visitor,
|
||||
)
|
||||
|
||||
|
||||
class TencentVectorDBTranslator(Visitor):
|
||||
COMPARATOR_MAP = {
|
||||
Comparator.EQ: "=",
|
||||
Comparator.NE: "!=",
|
||||
Comparator.GT: ">",
|
||||
Comparator.GTE: ">=",
|
||||
Comparator.LT: "<",
|
||||
Comparator.LTE: "<=",
|
||||
Comparator.IN: "in",
|
||||
Comparator.NIN: "not in",
|
||||
}
|
||||
|
||||
allowed_comparators: Optional[Sequence[Comparator]] = list(COMPARATOR_MAP.keys())
|
||||
allowed_operators: Optional[Sequence[Operator]] = [
|
||||
Operator.AND,
|
||||
Operator.OR,
|
||||
Operator.NOT,
|
||||
]
|
||||
|
||||
def __init__(self, meta_keys: Optional[Sequence[str]] = None):
|
||||
self.meta_keys = meta_keys or []
|
||||
|
||||
def visit_operation(self, operation: Operation) -> str:
|
||||
if operation.operator in (Operator.AND, Operator.OR):
|
||||
ret = f" {operation.operator.value} ".join(
|
||||
[arg.accept(self) for arg in operation.arguments]
|
||||
)
|
||||
if operation.operator == Operator.OR:
|
||||
ret = f"({ret})"
|
||||
return ret
|
||||
else:
|
||||
return f"not ({operation.arguments[0].accept(self)})"
|
||||
|
||||
def visit_comparison(self, comparison: Comparison) -> str:
|
||||
if self.meta_keys and comparison.attribute not in self.meta_keys:
|
||||
raise ValueError(
|
||||
f"Expr Filtering found Unsupported attribute: {comparison.attribute}"
|
||||
)
|
||||
|
||||
if comparison.comparator in self.COMPARATOR_MAP:
|
||||
if comparison.comparator in [Comparator.IN, Comparator.NIN]:
|
||||
value = map(
|
||||
lambda x: f'"{x}"' if isinstance(x, str) else x, comparison.value
|
||||
)
|
||||
return (
|
||||
f"{comparison.attribute}"
|
||||
f" {self.COMPARATOR_MAP[comparison.comparator]} "
|
||||
f"({', '.join(value)})"
|
||||
)
|
||||
if isinstance(comparison.value, str):
|
||||
return (
|
||||
f"{comparison.attribute} "
|
||||
f"{self.COMPARATOR_MAP[comparison.comparator]}"
|
||||
f' "{comparison.value}"'
|
||||
)
|
||||
return (
|
||||
f"{comparison.attribute}"
|
||||
f" {self.COMPARATOR_MAP[comparison.comparator]} "
|
||||
f"{comparison.value}"
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported comparator {comparison.comparator}")
|
||||
|
||||
def visit_structured_query(
|
||||
self, structured_query: StructuredQuery
|
||||
) -> Tuple[str, dict]:
|
||||
if structured_query.filter is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = {"expr": structured_query.filter.accept(self)}
|
||||
return structured_query.query, kwargs
|
@@ -0,0 +1,92 @@
|
||||
from langchain.chains.query_constructor.ir import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
Operation,
|
||||
Operator,
|
||||
StructuredQuery,
|
||||
)
|
||||
from langchain.retrievers.self_query.tencentvectordb import TencentVectorDBTranslator
|
||||
|
||||
|
||||
def test_translate_with_operator() -> None:
|
||||
query = StructuredQuery(
|
||||
query="What are songs by Taylor Swift or Katy Perry"
|
||||
" under 3 minutes long in the dance pop genre",
|
||||
filter=Operation(
|
||||
operator=Operator.AND,
|
||||
arguments=[
|
||||
Operation(
|
||||
operator=Operator.OR,
|
||||
arguments=[
|
||||
Comparison(
|
||||
comparator=Comparator.EQ,
|
||||
attribute="artist",
|
||||
value="Taylor Swift",
|
||||
),
|
||||
Comparison(
|
||||
comparator=Comparator.EQ,
|
||||
attribute="artist",
|
||||
value="Katy Perry",
|
||||
),
|
||||
],
|
||||
),
|
||||
Comparison(comparator=Comparator.LT, attribute="length", value=180),
|
||||
],
|
||||
),
|
||||
)
|
||||
translator = TencentVectorDBTranslator()
|
||||
_, kwargs = translator.visit_structured_query(query)
|
||||
expr = '(artist = "Taylor Swift" or artist = "Katy Perry") and length < 180'
|
||||
assert kwargs["expr"] == expr
|
||||
|
||||
|
||||
def test_translate_with_in_comparison() -> None:
|
||||
# 写成Comparison的形式
|
||||
query = StructuredQuery(
|
||||
query="What are songs by Taylor Swift or Katy Perry "
|
||||
"under 3 minutes long in the dance pop genre",
|
||||
filter=Comparison(
|
||||
comparator=Comparator.IN,
|
||||
attribute="artist",
|
||||
value=["Taylor Swift", "Katy Perry"],
|
||||
),
|
||||
)
|
||||
translator = TencentVectorDBTranslator()
|
||||
_, kwargs = translator.visit_structured_query(query)
|
||||
expr = 'artist in ("Taylor Swift", "Katy Perry")'
|
||||
assert kwargs["expr"] == expr
|
||||
|
||||
|
||||
def test_translate_with_allowed_fields() -> None:
|
||||
query = StructuredQuery(
|
||||
query="What are songs by Taylor Swift or Katy Perry "
|
||||
"under 3 minutes long in the dance pop genre",
|
||||
filter=Comparison(
|
||||
comparator=Comparator.IN,
|
||||
attribute="artist",
|
||||
value=["Taylor Swift", "Katy Perry"],
|
||||
),
|
||||
)
|
||||
translator = TencentVectorDBTranslator(meta_keys=["artist"])
|
||||
_, kwargs = translator.visit_structured_query(query)
|
||||
expr = 'artist in ("Taylor Swift", "Katy Perry")'
|
||||
assert kwargs["expr"] == expr
|
||||
|
||||
|
||||
def test_translate_with_unsupported_field() -> None:
|
||||
query = StructuredQuery(
|
||||
query="What are songs by Taylor Swift or Katy Perry "
|
||||
"under 3 minutes long in the dance pop genre",
|
||||
filter=Comparison(
|
||||
comparator=Comparator.IN,
|
||||
attribute="artist",
|
||||
value=["Taylor Swift", "Katy Perry"],
|
||||
),
|
||||
)
|
||||
translator = TencentVectorDBTranslator(meta_keys=["title"])
|
||||
try:
|
||||
translator.visit_structured_query(query)
|
||||
except ValueError as e:
|
||||
assert str(e) == "Expr Filtering found Unsupported attribute: artist"
|
||||
else:
|
||||
assert False
|
Reference in New Issue
Block a user