mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-20 22:03:52 +00:00
Readded similarity_search_by_vector (#1568)
I am redoing this PR, as I made a mistake by merging the latest changes into my fork's branch, sorry. This added a bunch of commits to my previous PR. This fixes #1451.
This commit is contained in:
parent
c1dc784a3d
commit
6ed16e13b1
@ -16,6 +16,23 @@ if TYPE_CHECKING:
|
|||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
|
||||||
|
def _results_to_docs(results: Any) -> List[Document]:
|
||||||
|
return [doc for doc, _ in _results_to_docs_and_scores(results)]
|
||||||
|
|
||||||
|
|
||||||
|
def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]:
|
||||||
|
return [
|
||||||
|
# TODO: Chroma can do batch querying,
|
||||||
|
# we shouldn't hard code to the 1st result
|
||||||
|
(Document(page_content=result[0], metadata=result[1]), result[2])
|
||||||
|
for result in zip(
|
||||||
|
results["documents"][0],
|
||||||
|
results["metadatas"][0],
|
||||||
|
results["distances"][0],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class Chroma(VectorStore):
|
class Chroma(VectorStore):
|
||||||
"""Wrapper around ChromaDB embeddings platform.
|
"""Wrapper around ChromaDB embeddings platform.
|
||||||
|
|
||||||
@ -126,6 +143,22 @@ class Chroma(VectorStore):
|
|||||||
docs_and_scores = self.similarity_search_with_score(query, k)
|
docs_and_scores = self.similarity_search_with_score(query, k)
|
||||||
return [doc for doc, _ in docs_and_scores]
|
return [doc for doc, _ in docs_and_scores]
|
||||||
|
|
||||||
|
def similarity_search_by_vector(
|
||||||
|
self,
|
||||||
|
embedding: List[float],
|
||||||
|
k: int = 4,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> List[Document]:
|
||||||
|
"""Return docs most similar to embedding vector.
|
||||||
|
Args:
|
||||||
|
embedding: Embedding to look up documents similar to.
|
||||||
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
Returns:
|
||||||
|
List of Documents most similar to the query vector.
|
||||||
|
"""
|
||||||
|
results = self._collection.query(query_embeddings=embedding, n_results=k)
|
||||||
|
return _results_to_docs(results)
|
||||||
|
|
||||||
def similarity_search_with_score(
|
def similarity_search_with_score(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
@ -154,17 +187,7 @@ class Chroma(VectorStore):
|
|||||||
query_embeddings=[query_embedding], n_results=k, where=filter
|
query_embeddings=[query_embedding], n_results=k, where=filter
|
||||||
)
|
)
|
||||||
|
|
||||||
docs = [
|
return _results_to_docs_and_scores(results)
|
||||||
# TODO: Chroma can do batch querying,
|
|
||||||
# we shouldn't hard code to the 1st result
|
|
||||||
(Document(page_content=result[0], metadata=result[1]), result[2])
|
|
||||||
for result in zip(
|
|
||||||
results["documents"][0],
|
|
||||||
results["metadatas"][0],
|
|
||||||
results["distances"][0],
|
|
||||||
)
|
|
||||||
]
|
|
||||||
return docs
|
|
||||||
|
|
||||||
def delete_collection(self) -> None:
|
def delete_collection(self) -> None:
|
||||||
"""Delete the collection."""
|
"""Delete the collection."""
|
||||||
|
Loading…
Reference in New Issue
Block a user