mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-01 19:03:25 +00:00
community[patch]: add possibility to search by vector in OpenSearchVectorSearch (#17878)
- **Description:** implements the missing `similarity_search_by_vector` function for `OpenSearchVectorSearch` - **Issue:** N/A - **Dependencies:** N/A
This commit is contained in:
parent
144f59b5fe
commit
afc1ba0329
@ -516,6 +516,15 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
docs_with_scores = self.similarity_search_with_score(query, k, **kwargs)
|
docs_with_scores = self.similarity_search_with_score(query, k, **kwargs)
|
||||||
return [doc[0] for doc in docs_with_scores]
|
return [doc[0] for doc in docs_with_scores]
|
||||||
|
|
||||||
|
def similarity_search_by_vector(
|
||||||
|
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||||
|
) -> List[Document]:
|
||||||
|
"""Return docs most similar to the embedding vector."""
|
||||||
|
docs_with_scores = self.similarity_search_with_score_by_vector(
|
||||||
|
embedding, k, **kwargs
|
||||||
|
)
|
||||||
|
return [doc[0] for doc in docs_with_scores]
|
||||||
|
|
||||||
def similarity_search_with_score(
|
def similarity_search_with_score(
|
||||||
self, query: str, k: int = 4, **kwargs: Any
|
self, query: str, k: int = 4, **kwargs: Any
|
||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
@ -534,19 +543,43 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
Optional Args:
|
Optional Args:
|
||||||
same as `similarity_search`
|
same as `similarity_search`
|
||||||
"""
|
"""
|
||||||
|
embedding = self.embedding_function.embed_query(query)
|
||||||
|
return self.similarity_search_with_score_by_vector(embedding, k, **kwargs)
|
||||||
|
|
||||||
|
def similarity_search_with_score_by_vector(
|
||||||
|
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||||
|
) -> List[Tuple[Document, float]]:
|
||||||
|
"""Return docs and it's scores most similar to the embedding vector.
|
||||||
|
|
||||||
|
By default, supports Approximate Search.
|
||||||
|
Also supports Script Scoring and Painless Scripting.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
embedding: Embedding vector to look up documents similar to.
|
||||||
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Documents along with its scores most similar to the query.
|
||||||
|
|
||||||
|
Optional Args:
|
||||||
|
same as `similarity_search`
|
||||||
|
"""
|
||||||
text_field = kwargs.get("text_field", "text")
|
text_field = kwargs.get("text_field", "text")
|
||||||
metadata_field = kwargs.get("metadata_field", "metadata")
|
metadata_field = kwargs.get("metadata_field", "metadata")
|
||||||
|
|
||||||
hits = self._raw_similarity_search_with_score(query=query, k=k, **kwargs)
|
hits = self._raw_similarity_search_with_score_by_vector(
|
||||||
|
embedding=embedding, k=k, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
documents_with_scores = [
|
documents_with_scores = [
|
||||||
(
|
(
|
||||||
Document(
|
Document(
|
||||||
page_content=hit["_source"][text_field],
|
page_content=hit["_source"][text_field],
|
||||||
metadata=hit["_source"]
|
metadata=(
|
||||||
|
hit["_source"]
|
||||||
if metadata_field == "*" or metadata_field not in hit["_source"]
|
if metadata_field == "*" or metadata_field not in hit["_source"]
|
||||||
else hit["_source"][metadata_field],
|
else hit["_source"][metadata_field]
|
||||||
|
),
|
||||||
),
|
),
|
||||||
hit["_score"],
|
hit["_score"],
|
||||||
)
|
)
|
||||||
@ -554,26 +587,25 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
]
|
]
|
||||||
return documents_with_scores
|
return documents_with_scores
|
||||||
|
|
||||||
def _raw_similarity_search_with_score(
|
def _raw_similarity_search_with_score_by_vector(
|
||||||
self, query: str, k: int = 4, **kwargs: Any
|
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||||
) -> List[dict]:
|
) -> List[dict]:
|
||||||
"""Return raw opensearch documents (dict) including vectors,
|
"""Return raw opensearch documents (dict) including vectors,
|
||||||
scores most similar to query.
|
scores most similar to the embedding vector.
|
||||||
|
|
||||||
By default, supports Approximate Search.
|
By default, supports Approximate Search.
|
||||||
Also supports Script Scoring and Painless Scripting.
|
Also supports Script Scoring and Painless Scripting.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: Text to look up documents similar to.
|
embedding: Embedding vector to look up documents similar to.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of dict with its scores most similar to the query.
|
List of dict with its scores most similar to the embedding.
|
||||||
|
|
||||||
Optional Args:
|
Optional Args:
|
||||||
same as `similarity_search`
|
same as `similarity_search`
|
||||||
"""
|
"""
|
||||||
embedding = self.embedding_function.embed_query(query)
|
|
||||||
search_type = kwargs.get("search_type", "approximate_search")
|
search_type = kwargs.get("search_type", "approximate_search")
|
||||||
vector_field = kwargs.get("vector_field", "vector_field")
|
vector_field = kwargs.get("vector_field", "vector_field")
|
||||||
index_name = kwargs.get("index_name", self.index_name)
|
index_name = kwargs.get("index_name", self.index_name)
|
||||||
@ -702,7 +734,9 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
embedding = self.embedding_function.embed_query(query)
|
embedding = self.embedding_function.embed_query(query)
|
||||||
|
|
||||||
# Do ANN/KNN search to get top fetch_k results where fetch_k >= k
|
# Do ANN/KNN search to get top fetch_k results where fetch_k >= k
|
||||||
results = self._raw_similarity_search_with_score(query, fetch_k, **kwargs)
|
results = self._raw_similarity_search_with_score_by_vector(
|
||||||
|
embedding, fetch_k, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
embeddings = [result["_source"][vector_field] for result in results]
|
embeddings = [result["_source"][vector_field] for result in results]
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user