mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-09 21:08:59 +00:00
Added more filtering options to pgvector vectorstore (#14852)
- **Description:** Using PGVector vector store, it was only possible to filter for values equals, in or not in metadata. Extended this feature to work with the following keywords : IN, NIN, BETWEEN, GT, LT, NE, EQ, LIKE, CONTAINS, OR, AND --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
dfd7b9edda
commit
d006be60ec
@ -485,6 +485,66 @@ class PGVector(VectorStore):
|
||||
]
|
||||
return docs
|
||||
|
||||
def _create_filter_clause(self, key, value):
|
||||
IN, NIN, BETWEEN, GT, LT, NE = "in", "nin", "between", "gt", "lt", "ne"
|
||||
EQ, LIKE, CONTAINS, OR, AND = "eq", "like", "contains", "or", "and"
|
||||
|
||||
value_case_insensitive = {k.lower(): v for k, v in value.items()}
|
||||
if IN in map(str.lower, value):
|
||||
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext.in_(
|
||||
value_case_insensitive[IN]
|
||||
)
|
||||
elif NIN in map(str.lower, value):
|
||||
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext.not_in(
|
||||
value_case_insensitive[NIN]
|
||||
)
|
||||
elif BETWEEN in map(str.lower, value):
|
||||
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext.between(
|
||||
str(value_case_insensitive[BETWEEN][0]),
|
||||
str(value_case_insensitive[BETWEEN][1]),
|
||||
)
|
||||
elif GT in map(str.lower, value):
|
||||
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext > str(
|
||||
value_case_insensitive[GT]
|
||||
)
|
||||
elif LT in map(str.lower, value):
|
||||
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext < str(
|
||||
value_case_insensitive[LT]
|
||||
)
|
||||
elif NE in map(str.lower, value):
|
||||
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext != str(
|
||||
value_case_insensitive[NE]
|
||||
)
|
||||
elif EQ in map(str.lower, value):
|
||||
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext == str(
|
||||
value_case_insensitive[EQ]
|
||||
)
|
||||
elif LIKE in map(str.lower, value):
|
||||
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext.like(
|
||||
value_case_insensitive[LIKE]
|
||||
)
|
||||
elif CONTAINS in map(str.lower, value):
|
||||
filter_by_metadata = self.EmbeddingStore.cmetadata[key].astext.contains(
|
||||
value_case_insensitive[CONTAINS]
|
||||
)
|
||||
elif OR in map(str.lower, value):
|
||||
or_clauses = [
|
||||
self._create_filter_clause(key, sub_value)
|
||||
for sub_value in value_case_insensitive[OR]
|
||||
]
|
||||
filter_by_metadata = sqlalchemy.or_(or_clauses)
|
||||
elif AND in map(str.lower, value):
|
||||
and_clauses = [
|
||||
self._create_filter_clause(key, sub_value)
|
||||
for sub_value in value_case_insensitive[AND]
|
||||
]
|
||||
filter_by_metadata = sqlalchemy.and_(and_clauses)
|
||||
|
||||
else:
|
||||
filter_by_metadata = None
|
||||
|
||||
return filter_by_metadata
|
||||
|
||||
def __query_collection(
|
||||
self,
|
||||
embedding: List[float],
|
||||
@ -501,22 +561,11 @@ class PGVector(VectorStore):
|
||||
|
||||
if filter is not None:
|
||||
filter_clauses = []
|
||||
IN, NIN = "in", "nin"
|
||||
|
||||
for key, value in filter.items():
|
||||
if isinstance(value, dict):
|
||||
value_case_insensitive = {
|
||||
k.lower(): v for k, v in value.items()
|
||||
}
|
||||
if IN in map(str.lower, value):
|
||||
filter_by_metadata = self.EmbeddingStore.cmetadata[
|
||||
key
|
||||
].astext.in_(value_case_insensitive[IN])
|
||||
elif NIN in map(str.lower, value):
|
||||
filter_by_metadata = self.EmbeddingStore.cmetadata[
|
||||
key
|
||||
].astext.not_in(value_case_insensitive[NIN])
|
||||
else:
|
||||
filter_by_metadata = None
|
||||
filter_by_metadata = self._create_filter_clause(key, value)
|
||||
|
||||
if filter_by_metadata is not None:
|
||||
filter_clauses.append(filter_by_metadata)
|
||||
else:
|
||||
|
Loading…
Reference in New Issue
Block a user