diff --git a/libs/community/langchain_community/vectorstores/opensearch_vector_search.py b/libs/community/langchain_community/vectorstores/opensearch_vector_search.py index 99ece00e7a6..cf56c500e92 100644 --- a/libs/community/langchain_community/vectorstores/opensearch_vector_search.py +++ b/libs/community/langchain_community/vectorstores/opensearch_vector_search.py @@ -516,6 +516,15 @@ class OpenSearchVectorSearch(VectorStore): docs_with_scores = self.similarity_search_with_score(query, k, **kwargs) return [doc[0] for doc in docs_with_scores] + def similarity_search_by_vector( + self, embedding: List[float], k: int = 4, **kwargs: Any + ) -> List[Document]: + """Return docs most similar to the embedding vector.""" + docs_with_scores = self.similarity_search_with_score_by_vector( + embedding, k, **kwargs + ) + return [doc[0] for doc in docs_with_scores] + def similarity_search_with_score( self, query: str, k: int = 4, **kwargs: Any ) -> List[Tuple[Document, float]]: @@ -534,19 +543,43 @@ class OpenSearchVectorSearch(VectorStore): Optional Args: same as `similarity_search` """ + embedding = self.embedding_function.embed_query(query) + return self.similarity_search_with_score_by_vector(embedding, k, **kwargs) + def similarity_search_with_score_by_vector( + self, embedding: List[float], k: int = 4, **kwargs: Any + ) -> List[Tuple[Document, float]]: + """Return docs and it's scores most similar to the embedding vector. + + By default, supports Approximate Search. + Also supports Script Scoring and Painless Scripting. + + Args: + embedding: Embedding vector to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + + Returns: + List of Documents along with its scores most similar to the query. + + Optional Args: + same as `similarity_search` + """ text_field = kwargs.get("text_field", "text") metadata_field = kwargs.get("metadata_field", "metadata") - hits = self._raw_similarity_search_with_score(query=query, k=k, **kwargs) + hits = self._raw_similarity_search_with_score_by_vector( + embedding=embedding, k=k, **kwargs + ) documents_with_scores = [ ( Document( page_content=hit["_source"][text_field], - metadata=hit["_source"] - if metadata_field == "*" or metadata_field not in hit["_source"] - else hit["_source"][metadata_field], + metadata=( + hit["_source"] + if metadata_field == "*" or metadata_field not in hit["_source"] + else hit["_source"][metadata_field] + ), ), hit["_score"], ) @@ -554,26 +587,25 @@ class OpenSearchVectorSearch(VectorStore): ] return documents_with_scores - def _raw_similarity_search_with_score( - self, query: str, k: int = 4, **kwargs: Any + def _raw_similarity_search_with_score_by_vector( + self, embedding: List[float], k: int = 4, **kwargs: Any ) -> List[dict]: """Return raw opensearch documents (dict) including vectors, - scores most similar to query. + scores most similar to the embedding vector. By default, supports Approximate Search. Also supports Script Scoring and Painless Scripting. Args: - query: Text to look up documents similar to. + embedding: Embedding vector to look up documents similar to. k: Number of Documents to return. Defaults to 4. Returns: - List of dict with its scores most similar to the query. + List of dict with its scores most similar to the embedding. Optional Args: same as `similarity_search` """ - embedding = self.embedding_function.embed_query(query) search_type = kwargs.get("search_type", "approximate_search") vector_field = kwargs.get("vector_field", "vector_field") index_name = kwargs.get("index_name", self.index_name) @@ -702,7 +734,9 @@ class OpenSearchVectorSearch(VectorStore): embedding = self.embedding_function.embed_query(query) # Do ANN/KNN search to get top fetch_k results where fetch_k >= k - results = self._raw_similarity_search_with_score(query, fetch_k, **kwargs) + results = self._raw_similarity_search_with_score_by_vector( + embedding, fetch_k, **kwargs + ) embeddings = [result["_source"][vector_field] for result in results]