mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 20:58:25 +00:00
Improve handling of empty queries for timescale vector (#12393)
**Description:** Improve handling of empty queries in timescale-vector. For timescale-vector it is more efficient to get a None embedding when the embedding has no semantic meaning. It allows timescale-vector to perform more optimizations. Thus, when the query is empty, use a None embedding. Also pass down constructor arguments to the timescale vector client. --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
38cee5fae0
commit
11505f95d3
@ -79,6 +79,7 @@ class TimescaleVector(VectorStore):
|
|||||||
logger: Optional[logging.Logger] = None,
|
logger: Optional[logging.Logger] = None,
|
||||||
relevance_score_fn: Optional[Callable[[float], float]] = None,
|
relevance_score_fn: Optional[Callable[[float], float]] = None,
|
||||||
time_partition_interval: Optional[timedelta] = None,
|
time_partition_interval: Optional[timedelta] = None,
|
||||||
|
**kwargs: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
try:
|
try:
|
||||||
from timescale_vector import client
|
from timescale_vector import client
|
||||||
@ -103,6 +104,7 @@ class TimescaleVector(VectorStore):
|
|||||||
self.num_dimensions,
|
self.num_dimensions,
|
||||||
self._distance_strategy.value.lower(),
|
self._distance_strategy.value.lower(),
|
||||||
time_partition_interval=self._time_partition_interval,
|
time_partition_interval=self._time_partition_interval,
|
||||||
|
**kwargs,
|
||||||
)
|
)
|
||||||
self.async_client = client.Async(
|
self.async_client = client.Async(
|
||||||
self.service_url,
|
self.service_url,
|
||||||
@ -110,6 +112,7 @@ class TimescaleVector(VectorStore):
|
|||||||
self.num_dimensions,
|
self.num_dimensions,
|
||||||
self._distance_strategy.value.lower(),
|
self._distance_strategy.value.lower(),
|
||||||
time_partition_interval=self._time_partition_interval,
|
time_partition_interval=self._time_partition_interval,
|
||||||
|
**kwargs,
|
||||||
)
|
)
|
||||||
self.__post_init__()
|
self.__post_init__()
|
||||||
|
|
||||||
@ -310,6 +313,13 @@ class TimescaleVector(VectorStore):
|
|||||||
texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
|
texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _embed_query(self, query: str) -> Optional[List[float]]:
|
||||||
|
# an empty query should not be embedded
|
||||||
|
if query is None or query == "" or query.isspace():
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return self.embedding.embed_query(query)
|
||||||
|
|
||||||
def similarity_search(
|
def similarity_search(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
@ -328,7 +338,7 @@ class TimescaleVector(VectorStore):
|
|||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query.
|
List of Documents most similar to the query.
|
||||||
"""
|
"""
|
||||||
embedding = self.embedding.embed_query(text=query)
|
embedding = self._embed_query(query)
|
||||||
return self.similarity_search_by_vector(
|
return self.similarity_search_by_vector(
|
||||||
embedding=embedding,
|
embedding=embedding,
|
||||||
k=k,
|
k=k,
|
||||||
@ -355,7 +365,7 @@ class TimescaleVector(VectorStore):
|
|||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query.
|
List of Documents most similar to the query.
|
||||||
"""
|
"""
|
||||||
embedding = self.embedding.embed_query(text=query)
|
embedding = self._embed_query(query)
|
||||||
return await self.asimilarity_search_by_vector(
|
return await self.asimilarity_search_by_vector(
|
||||||
embedding=embedding,
|
embedding=embedding,
|
||||||
k=k,
|
k=k,
|
||||||
@ -382,7 +392,7 @@ class TimescaleVector(VectorStore):
|
|||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query and score for each
|
List of Documents most similar to the query and score for each
|
||||||
"""
|
"""
|
||||||
embedding = self.embedding.embed_query(query)
|
embedding = self._embed_query(query)
|
||||||
docs = self.similarity_search_with_score_by_vector(
|
docs = self.similarity_search_with_score_by_vector(
|
||||||
embedding=embedding,
|
embedding=embedding,
|
||||||
k=k,
|
k=k,
|
||||||
@ -410,7 +420,8 @@ class TimescaleVector(VectorStore):
|
|||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query and score for each
|
List of Documents most similar to the query and score for each
|
||||||
"""
|
"""
|
||||||
embedding = self.embedding.embed_query(query)
|
|
||||||
|
embedding = self._embed_query(query)
|
||||||
return await self.asimilarity_search_with_score_by_vector(
|
return await self.asimilarity_search_with_score_by_vector(
|
||||||
embedding=embedding,
|
embedding=embedding,
|
||||||
k=k,
|
k=k,
|
||||||
@ -445,7 +456,7 @@ class TimescaleVector(VectorStore):
|
|||||||
|
|
||||||
def similarity_search_with_score_by_vector(
|
def similarity_search_with_score_by_vector(
|
||||||
self,
|
self,
|
||||||
embedding: List[float],
|
embedding: Optional[List[float]],
|
||||||
k: int = 4,
|
k: int = 4,
|
||||||
filter: Optional[Union[dict, list]] = None,
|
filter: Optional[Union[dict, list]] = None,
|
||||||
predicates: Optional[Predicates] = None,
|
predicates: Optional[Predicates] = None,
|
||||||
@ -481,7 +492,7 @@ class TimescaleVector(VectorStore):
|
|||||||
|
|
||||||
async def asimilarity_search_with_score_by_vector(
|
async def asimilarity_search_with_score_by_vector(
|
||||||
self,
|
self,
|
||||||
embedding: List[float],
|
embedding: Optional[List[float]],
|
||||||
k: int = 4,
|
k: int = 4,
|
||||||
filter: Optional[Union[dict, list]] = None,
|
filter: Optional[Union[dict, list]] = None,
|
||||||
predicates: Optional[Predicates] = None,
|
predicates: Optional[Predicates] = None,
|
||||||
@ -517,7 +528,7 @@ class TimescaleVector(VectorStore):
|
|||||||
|
|
||||||
def similarity_search_by_vector(
|
def similarity_search_by_vector(
|
||||||
self,
|
self,
|
||||||
embedding: List[float],
|
embedding: Optional[List[float]],
|
||||||
k: int = 4,
|
k: int = 4,
|
||||||
filter: Optional[Union[dict, list]] = None,
|
filter: Optional[Union[dict, list]] = None,
|
||||||
predicates: Optional[Predicates] = None,
|
predicates: Optional[Predicates] = None,
|
||||||
@ -540,7 +551,7 @@ class TimescaleVector(VectorStore):
|
|||||||
|
|
||||||
async def asimilarity_search_by_vector(
|
async def asimilarity_search_by_vector(
|
||||||
self,
|
self,
|
||||||
embedding: List[float],
|
embedding: Optional[List[float]],
|
||||||
k: int = 4,
|
k: int = 4,
|
||||||
filter: Optional[Union[dict, list]] = None,
|
filter: Optional[Union[dict, list]] = None,
|
||||||
predicates: Optional[Predicates] = None,
|
predicates: Optional[Predicates] = None,
|
||||||
|
Loading…
Reference in New Issue
Block a user