mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-19 13:23:35 +00:00
core[patch]: Improve VectorStore API doc (#22547)
This commit is contained in:
parent
89128b7a49
commit
c34ad8c163
@ -71,7 +71,7 @@ class VectorStore(ABC):
|
|||||||
Args:
|
Args:
|
||||||
texts: Iterable of strings to add to the vectorstore.
|
texts: Iterable of strings to add to the vectorstore.
|
||||||
metadatas: Optional list of metadatas associated with the texts.
|
metadatas: Optional list of metadatas associated with the texts.
|
||||||
kwargs: vectorstore specific parameters
|
**kwargs: vectorstore specific parameters.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of ids from adding the texts into the vectorstore.
|
List of ids from adding the texts into the vectorstore.
|
||||||
@ -120,17 +120,26 @@ class VectorStore(ABC):
|
|||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""Run more texts through the embeddings and add to the vectorstore."""
|
"""Run more texts through the embeddings and add to the vectorstore.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: Iterable of strings to add to the vectorstore.
|
||||||
|
metadatas: Optional list of metadatas associated with the texts.
|
||||||
|
**kwargs: vectorstore specific parameters.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of ids from adding the texts into the vectorstore.
|
||||||
|
"""
|
||||||
return await run_in_executor(None, self.add_texts, texts, metadatas, **kwargs)
|
return await run_in_executor(None, self.add_texts, texts, metadatas, **kwargs)
|
||||||
|
|
||||||
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
|
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
|
||||||
"""Run more documents through the embeddings and add to the vectorstore.
|
"""Run more documents through the embeddings and add to the vectorstore.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
documents (List[Document]: Documents to add to the vectorstore.
|
documents: Documents to add to the vectorstore.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[str]: List of IDs of the added texts.
|
List of IDs of the added texts.
|
||||||
"""
|
"""
|
||||||
# TODO: Handle the case where the user doesn't provide ids on the Collection
|
# TODO: Handle the case where the user doesn't provide ids on the Collection
|
||||||
texts = [doc.page_content for doc in documents]
|
texts = [doc.page_content for doc in documents]
|
||||||
@ -143,17 +152,24 @@ class VectorStore(ABC):
|
|||||||
"""Run more documents through the embeddings and add to the vectorstore.
|
"""Run more documents through the embeddings and add to the vectorstore.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
documents (List[Document]: Documents to add to the vectorstore.
|
documents: Documents to add to the vectorstore.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[str]: List of IDs of the added texts.
|
List of IDs of the added texts.
|
||||||
"""
|
"""
|
||||||
texts = [doc.page_content for doc in documents]
|
texts = [doc.page_content for doc in documents]
|
||||||
metadatas = [doc.metadata for doc in documents]
|
metadatas = [doc.metadata for doc in documents]
|
||||||
return await self.aadd_texts(texts, metadatas, **kwargs)
|
return await self.aadd_texts(texts, metadatas, **kwargs)
|
||||||
|
|
||||||
def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
|
def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
|
||||||
"""Return docs most similar to query using specified search type."""
|
"""Return docs most similar to query using specified search type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Input text
|
||||||
|
search_type: Type of search to perform. Can be "similarity",
|
||||||
|
"mmr", or "similarity_score_threshold".
|
||||||
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
"""
|
||||||
if search_type == "similarity":
|
if search_type == "similarity":
|
||||||
return self.similarity_search(query, **kwargs)
|
return self.similarity_search(query, **kwargs)
|
||||||
elif search_type == "similarity_score_threshold":
|
elif search_type == "similarity_score_threshold":
|
||||||
@ -172,7 +188,14 @@ class VectorStore(ABC):
|
|||||||
async def asearch(
|
async def asearch(
|
||||||
self, query: str, search_type: str, **kwargs: Any
|
self, query: str, search_type: str, **kwargs: Any
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs most similar to query using specified search type."""
|
"""Return docs most similar to query using specified search type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Input text.
|
||||||
|
search_type: Type of search to perform. Can be "similarity",
|
||||||
|
"mmr", or "similarity_score_threshold".
|
||||||
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
"""
|
||||||
if search_type == "similarity":
|
if search_type == "similarity":
|
||||||
return await self.asimilarity_search(query, **kwargs)
|
return await self.asimilarity_search(query, **kwargs)
|
||||||
elif search_type == "similarity_score_threshold":
|
elif search_type == "similarity_score_threshold":
|
||||||
@ -192,7 +215,15 @@ class VectorStore(ABC):
|
|||||||
def similarity_search(
|
def similarity_search(
|
||||||
self, query: str, k: int = 4, **kwargs: Any
|
self, query: str, k: int = 4, **kwargs: Any
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs most similar to query."""
|
"""Return docs most similar to query.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Input text.
|
||||||
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Documents most similar to the query.
|
||||||
|
"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _euclidean_relevance_score_fn(distance: float) -> float:
|
def _euclidean_relevance_score_fn(distance: float) -> float:
|
||||||
@ -239,13 +270,21 @@ class VectorStore(ABC):
|
|||||||
def similarity_search_with_score(
|
def similarity_search_with_score(
|
||||||
self, *args: Any, **kwargs: Any
|
self, *args: Any, **kwargs: Any
|
||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
"""Run similarity search with distance."""
|
"""Run similarity search with distance.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Tuples of (doc, similarity_score)
|
||||||
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
async def asimilarity_search_with_score(
|
async def asimilarity_search_with_score(
|
||||||
self, *args: Any, **kwargs: Any
|
self, *args: Any, **kwargs: Any
|
||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
"""Run similarity search with distance asynchronously."""
|
"""Run similarity search with distance.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Tuples of (doc, similarity_score)
|
||||||
|
"""
|
||||||
|
|
||||||
# This is a temporary workaround to make the similarity search
|
# This is a temporary workaround to make the similarity search
|
||||||
# asynchronous. The proper solution is to make the similarity search
|
# asynchronous. The proper solution is to make the similarity search
|
||||||
@ -268,7 +307,7 @@ class VectorStore(ABC):
|
|||||||
0 is dissimilar, 1 is most similar.
|
0 is dissimilar, 1 is most similar.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: input text
|
query: Input text.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
**kwargs: kwargs to be passed to similarity search. Should include:
|
**kwargs: kwargs to be passed to similarity search. Should include:
|
||||||
score_threshold: Optional, a floating point value between 0 to 1 to
|
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||||
@ -288,14 +327,14 @@ class VectorStore(ABC):
|
|||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
"""
|
"""
|
||||||
Default async similarity search with relevance scores. Modify if necessary
|
Default similarity search with relevance scores. Modify if necessary
|
||||||
in subclass.
|
in subclass.
|
||||||
Return docs and relevance scores in the range [0, 1].
|
Return docs and relevance scores in the range [0, 1].
|
||||||
|
|
||||||
0 is dissimilar, 1 is most similar.
|
0 is dissimilar, 1 is most similar.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: input text
|
query: Input text.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
**kwargs: kwargs to be passed to similarity search. Should include:
|
**kwargs: kwargs to be passed to similarity search. Should include:
|
||||||
score_threshold: Optional, a floating point value between 0 to 1 to
|
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||||
@ -319,7 +358,7 @@ class VectorStore(ABC):
|
|||||||
0 is dissimilar, 1 is most similar.
|
0 is dissimilar, 1 is most similar.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: input text
|
query: Input text.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
**kwargs: kwargs to be passed to similarity search. Should include:
|
**kwargs: kwargs to be passed to similarity search. Should include:
|
||||||
score_threshold: Optional, a floating point value between 0 to 1 to
|
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||||
@ -361,12 +400,12 @@ class VectorStore(ABC):
|
|||||||
k: int = 4,
|
k: int = 4,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
"""Return docs and relevance scores in the range [0, 1], asynchronously.
|
"""Return docs and relevance scores in the range [0, 1].
|
||||||
|
|
||||||
0 is dissimilar, 1 is most similar.
|
0 is dissimilar, 1 is most similar.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: input text
|
query: Input text.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
**kwargs: kwargs to be passed to similarity search. Should include:
|
**kwargs: kwargs to be passed to similarity search. Should include:
|
||||||
score_threshold: Optional, a floating point value between 0 to 1 to
|
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||||
@ -405,7 +444,15 @@ class VectorStore(ABC):
|
|||||||
async def asimilarity_search(
|
async def asimilarity_search(
|
||||||
self, query: str, k: int = 4, **kwargs: Any
|
self, query: str, k: int = 4, **kwargs: Any
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs most similar to query."""
|
"""Return docs most similar to query.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Input text.
|
||||||
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Documents most similar to the query.
|
||||||
|
"""
|
||||||
|
|
||||||
# This is a temporary workaround to make the similarity search
|
# This is a temporary workaround to make the similarity search
|
||||||
# asynchronous. The proper solution is to make the similarity search
|
# asynchronous. The proper solution is to make the similarity search
|
||||||
@ -429,7 +476,15 @@ class VectorStore(ABC):
|
|||||||
async def asimilarity_search_by_vector(
|
async def asimilarity_search_by_vector(
|
||||||
self, embedding: List[float], k: int = 4, **kwargs: Any
|
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs most similar to embedding vector."""
|
"""Return docs most similar to embedding vector.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
embedding: Embedding to look up documents similar to.
|
||||||
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Documents most similar to the query vector.
|
||||||
|
"""
|
||||||
|
|
||||||
# This is a temporary workaround to make the similarity search
|
# This is a temporary workaround to make the similarity search
|
||||||
# asynchronous. The proper solution is to make the similarity search
|
# asynchronous. The proper solution is to make the similarity search
|
||||||
@ -536,7 +591,22 @@ class VectorStore(ABC):
|
|||||||
lambda_mult: float = 0.5,
|
lambda_mult: float = 0.5,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs selected using the maximal marginal relevance."""
|
"""Return docs selected using the maximal marginal relevance.
|
||||||
|
|
||||||
|
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||||
|
among selected documents.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
embedding: Embedding to look up documents similar to.
|
||||||
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||||
|
lambda_mult: Number between 0 and 1 that determines the degree
|
||||||
|
of diversity among the results with 0 corresponding
|
||||||
|
to maximum diversity and 1 to minimum diversity.
|
||||||
|
Defaults to 0.5.
|
||||||
|
Returns:
|
||||||
|
List of Documents selected by maximal marginal relevance.
|
||||||
|
"""
|
||||||
return await run_in_executor(
|
return await run_in_executor(
|
||||||
None,
|
None,
|
||||||
self.max_marginal_relevance_search_by_vector,
|
self.max_marginal_relevance_search_by_vector,
|
||||||
@ -554,7 +624,12 @@ class VectorStore(ABC):
|
|||||||
embedding: Embeddings,
|
embedding: Embeddings,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> VST:
|
) -> VST:
|
||||||
"""Return VectorStore initialized from documents and embeddings."""
|
"""Return VectorStore initialized from documents and embeddings.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
documents: List of Documents to add to the vectorstore.
|
||||||
|
embedding: Embedding function to use.
|
||||||
|
"""
|
||||||
texts = [d.page_content for d in documents]
|
texts = [d.page_content for d in documents]
|
||||||
metadatas = [d.metadata for d in documents]
|
metadatas = [d.metadata for d in documents]
|
||||||
return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs)
|
return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs)
|
||||||
@ -566,7 +641,12 @@ class VectorStore(ABC):
|
|||||||
embedding: Embeddings,
|
embedding: Embeddings,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> VST:
|
) -> VST:
|
||||||
"""Return VectorStore initialized from documents and embeddings."""
|
"""Return VectorStore initialized from documents and embeddings.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
documents: List of Documents to add to the vectorstore.
|
||||||
|
embedding: Embedding function to use.
|
||||||
|
"""
|
||||||
texts = [d.page_content for d in documents]
|
texts = [d.page_content for d in documents]
|
||||||
metadatas = [d.metadata for d in documents]
|
metadatas = [d.metadata for d in documents]
|
||||||
return await cls.afrom_texts(texts, embedding, metadatas=metadatas, **kwargs)
|
return await cls.afrom_texts(texts, embedding, metadatas=metadatas, **kwargs)
|
||||||
@ -580,7 +660,13 @@ class VectorStore(ABC):
|
|||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> VST:
|
) -> VST:
|
||||||
"""Return VectorStore initialized from texts and embeddings."""
|
"""Return VectorStore initialized from texts and embeddings.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: Texts to add to the vectorstore.
|
||||||
|
metadatas: Optional list of metadatas associated with the texts.
|
||||||
|
embedding: Embedding function to use.
|
||||||
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
async def afrom_texts(
|
async def afrom_texts(
|
||||||
@ -590,7 +676,13 @@ class VectorStore(ABC):
|
|||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> VST:
|
) -> VST:
|
||||||
"""Return VectorStore initialized from texts and embeddings."""
|
"""Return VectorStore initialized from texts and embeddings.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: Texts to add to the vectorstore.
|
||||||
|
metadatas: Optional list of metadatas associated with the texts.
|
||||||
|
embedding: Embedding function to use.
|
||||||
|
"""
|
||||||
return await run_in_executor(
|
return await run_in_executor(
|
||||||
None, cls.from_texts, texts, embedding, metadatas, **kwargs
|
None, cls.from_texts, texts, embedding, metadatas, **kwargs
|
||||||
)
|
)
|
||||||
@ -741,11 +833,25 @@ class VectorStoreRetriever(BaseRetriever):
|
|||||||
return docs
|
return docs
|
||||||
|
|
||||||
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
|
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
|
||||||
"""Add documents to vectorstore."""
|
"""Add documents to the vectorstore.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
documents: Documents to add to the vectorstore.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of IDs of the added texts.
|
||||||
|
"""
|
||||||
return self.vectorstore.add_documents(documents, **kwargs)
|
return self.vectorstore.add_documents(documents, **kwargs)
|
||||||
|
|
||||||
async def aadd_documents(
|
async def aadd_documents(
|
||||||
self, documents: List[Document], **kwargs: Any
|
self, documents: List[Document], **kwargs: Any
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""Add documents to vectorstore."""
|
"""Add documents to the vectorstore.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
documents: Documents to add to the vectorstore.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of IDs of the added texts.
|
||||||
|
"""
|
||||||
return await self.vectorstore.aadd_documents(documents, **kwargs)
|
return await self.vectorstore.aadd_documents(documents, **kwargs)
|
||||||
|
Loading…
Reference in New Issue
Block a user