community[patch], langchain[minor]: Enhance Tencent Cloud VectorDB, langchain: make Tencent Cloud VectorDB self query retrieve compatible (#19651)

- make Tencent Cloud VectorDB support metadata filtering.
- implement delete function for Tencent Cloud VectorDB.
- support both Langchain Embedding model and Tencent Cloud VDB embedding
model.
- Tencent Cloud VectorDB support filter search keyword, compatible with
langchain filtering syntax.
- add Tencent Cloud VectorDB TranslationVisitor, now work with self
query retriever.
- more documentations.

---------

Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
jeff kit
2024-04-10 00:50:48 +08:00
committed by GitHub
parent 1a34c65e01
commit ac42e96e4c
9 changed files with 1157 additions and 110 deletions

View File

@@ -18,6 +18,7 @@ from langchain_community.vectorstores import (
Qdrant,
Redis,
SupabaseVectorStore,
TencentVectorDB,
TimescaleVector,
Vectara,
Weaviate,
@@ -54,6 +55,7 @@ from langchain.retrievers.self_query.pinecone import PineconeTranslator
from langchain.retrievers.self_query.qdrant import QdrantTranslator
from langchain.retrievers.self_query.redis import RedisTranslator
from langchain.retrievers.self_query.supabase import SupabaseVectorTranslator
from langchain.retrievers.self_query.tencentvectordb import TencentVectorDBTranslator
from langchain.retrievers.self_query.timescalevector import TimescaleVectorTranslator
from langchain.retrievers.self_query.vectara import VectaraTranslator
from langchain.retrievers.self_query.weaviate import WeaviateTranslator
@@ -90,6 +92,11 @@ def _get_builtin_translator(vectorstore: VectorStore) -> Visitor:
return MyScaleTranslator(metadata_key=vectorstore.metadata_column)
elif isinstance(vectorstore, Redis):
return RedisTranslator.from_vectorstore(vectorstore)
elif isinstance(vectorstore, TencentVectorDB):
fields = [
field.name for field in (vectorstore.meta_fields or []) if field.index
]
return TencentVectorDBTranslator(fields)
elif vectorstore.__class__ in BUILTIN_TRANSLATORS:
return BUILTIN_TRANSLATORS[vectorstore.__class__]()
else:

View File

@@ -0,0 +1,85 @@
from __future__ import annotations
from typing import Optional, Sequence, Tuple
from langchain.chains.query_constructor.ir import (
Comparator,
Comparison,
Operation,
Operator,
StructuredQuery,
Visitor,
)
class TencentVectorDBTranslator(Visitor):
COMPARATOR_MAP = {
Comparator.EQ: "=",
Comparator.NE: "!=",
Comparator.GT: ">",
Comparator.GTE: ">=",
Comparator.LT: "<",
Comparator.LTE: "<=",
Comparator.IN: "in",
Comparator.NIN: "not in",
}
allowed_comparators: Optional[Sequence[Comparator]] = list(COMPARATOR_MAP.keys())
allowed_operators: Optional[Sequence[Operator]] = [
Operator.AND,
Operator.OR,
Operator.NOT,
]
def __init__(self, meta_keys: Optional[Sequence[str]] = None):
self.meta_keys = meta_keys or []
def visit_operation(self, operation: Operation) -> str:
if operation.operator in (Operator.AND, Operator.OR):
ret = f" {operation.operator.value} ".join(
[arg.accept(self) for arg in operation.arguments]
)
if operation.operator == Operator.OR:
ret = f"({ret})"
return ret
else:
return f"not ({operation.arguments[0].accept(self)})"
def visit_comparison(self, comparison: Comparison) -> str:
if self.meta_keys and comparison.attribute not in self.meta_keys:
raise ValueError(
f"Expr Filtering found Unsupported attribute: {comparison.attribute}"
)
if comparison.comparator in self.COMPARATOR_MAP:
if comparison.comparator in [Comparator.IN, Comparator.NIN]:
value = map(
lambda x: f'"{x}"' if isinstance(x, str) else x, comparison.value
)
return (
f"{comparison.attribute}"
f" {self.COMPARATOR_MAP[comparison.comparator]} "
f"({', '.join(value)})"
)
if isinstance(comparison.value, str):
return (
f"{comparison.attribute} "
f"{self.COMPARATOR_MAP[comparison.comparator]}"
f' "{comparison.value}"'
)
return (
f"{comparison.attribute}"
f" {self.COMPARATOR_MAP[comparison.comparator]} "
f"{comparison.value}"
)
else:
raise ValueError(f"Unsupported comparator {comparison.comparator}")
def visit_structured_query(
self, structured_query: StructuredQuery
) -> Tuple[str, dict]:
if structured_query.filter is None:
kwargs = {}
else:
kwargs = {"expr": structured_query.filter.accept(self)}
return structured_query.query, kwargs

View File

@@ -0,0 +1,92 @@
from langchain.chains.query_constructor.ir import (
Comparator,
Comparison,
Operation,
Operator,
StructuredQuery,
)
from langchain.retrievers.self_query.tencentvectordb import TencentVectorDBTranslator
def test_translate_with_operator() -> None:
query = StructuredQuery(
query="What are songs by Taylor Swift or Katy Perry"
" under 3 minutes long in the dance pop genre",
filter=Operation(
operator=Operator.AND,
arguments=[
Operation(
operator=Operator.OR,
arguments=[
Comparison(
comparator=Comparator.EQ,
attribute="artist",
value="Taylor Swift",
),
Comparison(
comparator=Comparator.EQ,
attribute="artist",
value="Katy Perry",
),
],
),
Comparison(comparator=Comparator.LT, attribute="length", value=180),
],
),
)
translator = TencentVectorDBTranslator()
_, kwargs = translator.visit_structured_query(query)
expr = '(artist = "Taylor Swift" or artist = "Katy Perry") and length < 180'
assert kwargs["expr"] == expr
def test_translate_with_in_comparison() -> None:
# 写成Comparison的形式
query = StructuredQuery(
query="What are songs by Taylor Swift or Katy Perry "
"under 3 minutes long in the dance pop genre",
filter=Comparison(
comparator=Comparator.IN,
attribute="artist",
value=["Taylor Swift", "Katy Perry"],
),
)
translator = TencentVectorDBTranslator()
_, kwargs = translator.visit_structured_query(query)
expr = 'artist in ("Taylor Swift", "Katy Perry")'
assert kwargs["expr"] == expr
def test_translate_with_allowed_fields() -> None:
query = StructuredQuery(
query="What are songs by Taylor Swift or Katy Perry "
"under 3 minutes long in the dance pop genre",
filter=Comparison(
comparator=Comparator.IN,
attribute="artist",
value=["Taylor Swift", "Katy Perry"],
),
)
translator = TencentVectorDBTranslator(meta_keys=["artist"])
_, kwargs = translator.visit_structured_query(query)
expr = 'artist in ("Taylor Swift", "Katy Perry")'
assert kwargs["expr"] == expr
def test_translate_with_unsupported_field() -> None:
query = StructuredQuery(
query="What are songs by Taylor Swift or Katy Perry "
"under 3 minutes long in the dance pop genre",
filter=Comparison(
comparator=Comparator.IN,
attribute="artist",
value=["Taylor Swift", "Katy Perry"],
),
)
translator = TencentVectorDBTranslator(meta_keys=["title"])
try:
translator.visit_structured_query(query)
except ValueError as e:
assert str(e) == "Expr Filtering found Unsupported attribute: artist"
else:
assert False