mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-13 14:50:00 +00:00
Added more filtering options to pgvector vectorstore (#14852)
- **Description:** Using PGVector vector store, it was only possible to filter for values equals, in or not in metadata. Extended this feature to work with the following keywords : IN, NIN, BETWEEN, GT, LT, NE, EQ, LIKE, CONTAINS, OR, AND --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
dfd7b9edda
commit
d006be60ec
@ -485,6 +485,66 @@ class PGVector(VectorStore):
|
|||||||
]
|
]
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
def _create_filter_clause(self, key, value):
|
||||||
|
IN, NIN, BETWEEN, GT, LT, NE = "in", "nin", "between", "gt", "lt", "ne"
|
||||||
|
EQ, LIKE, CONTAINS, OR, AND = "eq", "like", "contains", "or", "and"
|
||||||
|
|
||||||
|
value_case_insensitive = {k.lower(): v for k, v in value.items()}
|
||||||
|
if IN in map(str.lower, value):
|
||||||
|
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext.in_(
|
||||||
|
value_case_insensitive[IN]
|
||||||
|
)
|
||||||
|
elif NIN in map(str.lower, value):
|
||||||
|
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext.not_in(
|
||||||
|
value_case_insensitive[NIN]
|
||||||
|
)
|
||||||
|
elif BETWEEN in map(str.lower, value):
|
||||||
|
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext.between(
|
||||||
|
str(value_case_insensitive[BETWEEN][0]),
|
||||||
|
str(value_case_insensitive[BETWEEN][1]),
|
||||||
|
)
|
||||||
|
elif GT in map(str.lower, value):
|
||||||
|
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext > str(
|
||||||
|
value_case_insensitive[GT]
|
||||||
|
)
|
||||||
|
elif LT in map(str.lower, value):
|
||||||
|
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext < str(
|
||||||
|
value_case_insensitive[LT]
|
||||||
|
)
|
||||||
|
elif NE in map(str.lower, value):
|
||||||
|
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext != str(
|
||||||
|
value_case_insensitive[NE]
|
||||||
|
)
|
||||||
|
elif EQ in map(str.lower, value):
|
||||||
|
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext == str(
|
||||||
|
value_case_insensitive[EQ]
|
||||||
|
)
|
||||||
|
elif LIKE in map(str.lower, value):
|
||||||
|
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext.like(
|
||||||
|
value_case_insensitive[LIKE]
|
||||||
|
)
|
||||||
|
elif CONTAINS in map(str.lower, value):
|
||||||
|
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext.contains(
|
||||||
|
value_case_insensitive[CONTAINS]
|
||||||
|
)
|
||||||
|
elif OR in map(str.lower, value):
|
||||||
|
or_clauses = [
|
||||||
|
self._create_filter_clause(key, sub_value)
|
||||||
|
for sub_value in value_case_insensitive[OR]
|
||||||
|
]
|
||||||
|
filter_by_metadata = sqlalchemy.or_(or_clauses)
|
||||||
|
elif AND in map(str.lower, value):
|
||||||
|
and_clauses = [
|
||||||
|
self._create_filter_clause(key, sub_value)
|
||||||
|
for sub_value in value_case_insensitive[AND]
|
||||||
|
]
|
||||||
|
filter_by_metadata = sqlalchemy.and_(and_clauses)
|
||||||
|
|
||||||
|
else:
|
||||||
|
filter_by_metadata = None
|
||||||
|
|
||||||
|
return filter_by_metadata
|
||||||
|
|
||||||
def __query_collection(
|
def __query_collection(
|
||||||
self,
|
self,
|
||||||
embedding: List[float],
|
embedding: List[float],
|
||||||
@ -501,22 +561,11 @@ class PGVector(VectorStore):
|
|||||||
|
|
||||||
if filter is not None:
|
if filter is not None:
|
||||||
filter_clauses = []
|
filter_clauses = []
|
||||||
IN, NIN = "in", "nin"
|
|
||||||
for key, value in filter.items():
|
for key, value in filter.items():
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
value_case_insensitive = {
|
filter_by_metadata = self._create_filter_clause(key, value)
|
||||||
k.lower(): v for k, v in value.items()
|
|
||||||
}
|
|
||||||
if IN in map(str.lower, value):
|
|
||||||
filter_by_metadata = self.EmbeddingStore.cmetadata[
|
|
||||||
key
|
|
||||||
].astext.in_(value_case_insensitive[IN])
|
|
||||||
elif NIN in map(str.lower, value):
|
|
||||||
filter_by_metadata = self.EmbeddingStore.cmetadata[
|
|
||||||
key
|
|
||||||
].astext.not_in(value_case_insensitive[NIN])
|
|
||||||
else:
|
|
||||||
filter_by_metadata = None
|
|
||||||
if filter_by_metadata is not None:
|
if filter_by_metadata is not None:
|
||||||
filter_clauses.append(filter_by_metadata)
|
filter_clauses.append(filter_by_metadata)
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user