mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-06 05:08:20 +00:00
core: docstrings vectorstores
update (#24281)
Added missed docstrings. Formatted docstrings to the consistent form. --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
1e9cc02ed8
commit
5ccf8ebfac
@ -91,6 +91,10 @@ class VectorStore(ABC):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of ids from adding the texts into the vectorstore.
|
List of ids from adding the texts into the vectorstore.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the number of metadatas does not match the number of texts.
|
||||||
|
ValueError: If the number of ids does not match the number of texts.
|
||||||
"""
|
"""
|
||||||
if type(self).upsert != VectorStore.upsert:
|
if type(self).upsert != VectorStore.upsert:
|
||||||
# Import document in local scope to avoid circular imports
|
# Import document in local scope to avoid circular imports
|
||||||
@ -145,7 +149,12 @@ class VectorStore(ABC):
|
|||||||
kwargs should only include parameters that are common to all
|
kwargs should only include parameters that are common to all
|
||||||
documents. (e.g., timeout for indexing, retry policy, etc.)
|
documents. (e.g., timeout for indexing, retry policy, etc.)
|
||||||
kwargs should not include ids to avoid ambiguous semantics.
|
kwargs should not include ids to avoid ambiguous semantics.
|
||||||
Instead the ID should be provided as part of the Document object.
|
Instead, the ID should be provided as part of the Document object.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
UpsertResponse: A response object that contains the list of IDs that were
|
||||||
|
successfully added or updated in the vectorstore and the list of IDs that
|
||||||
|
failed to be added or updated.
|
||||||
|
|
||||||
.. versionadded:: 0.2.11
|
.. versionadded:: 0.2.11
|
||||||
"""
|
"""
|
||||||
@ -244,6 +253,11 @@ class VectorStore(ABC):
|
|||||||
kwargs should not include ids to avoid ambiguous semantics.
|
kwargs should not include ids to avoid ambiguous semantics.
|
||||||
Instead the ID should be provided as part of the Document object.
|
Instead the ID should be provided as part of the Document object.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
UpsertResponse: A response object that contains the list of IDs that were
|
||||||
|
successfully added or updated in the vectorstore and the list of IDs that
|
||||||
|
failed to be added or updated.
|
||||||
|
|
||||||
.. versionadded:: 0.2.11
|
.. versionadded:: 0.2.11
|
||||||
"""
|
"""
|
||||||
async for batch in abatch_iterate(batch_size, items):
|
async for batch in abatch_iterate(batch_size, items):
|
||||||
@ -292,7 +306,7 @@ class VectorStore(ABC):
|
|||||||
"""Delete by vector ID or other criteria.
|
"""Delete by vector ID or other criteria.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
ids: List of ids to delete.
|
ids: List of ids to delete. If None, delete all. Default is None.
|
||||||
**kwargs: Other keyword arguments that subclasses might use.
|
**kwargs: Other keyword arguments that subclasses might use.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -332,7 +346,7 @@ class VectorStore(ABC):
|
|||||||
|
|
||||||
# Implementations should override this method to provide an async native version.
|
# Implementations should override this method to provide an async native version.
|
||||||
async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
||||||
"""Get documents by their IDs.
|
"""Async get documents by their IDs.
|
||||||
|
|
||||||
The returned documents are expected to have the ID field set to the ID of the
|
The returned documents are expected to have the ID field set to the ID of the
|
||||||
document in the vector store.
|
document in the vector store.
|
||||||
@ -360,10 +374,10 @@ class VectorStore(ABC):
|
|||||||
async def adelete(
|
async def adelete(
|
||||||
self, ids: Optional[List[str]] = None, **kwargs: Any
|
self, ids: Optional[List[str]] = None, **kwargs: Any
|
||||||
) -> Optional[bool]:
|
) -> Optional[bool]:
|
||||||
"""Delete by vector ID or other criteria.
|
"""Async delete by vector ID or other criteria.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
ids: List of ids to delete.
|
ids: List of ids to delete. If None, delete all. Default is None.
|
||||||
**kwargs: Other keyword arguments that subclasses might use.
|
**kwargs: Other keyword arguments that subclasses might use.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -378,15 +392,20 @@ class VectorStore(ABC):
|
|||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""Run more texts through the embeddings and add to the vectorstore.
|
"""Async run more texts through the embeddings and add to the vectorstore.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
texts: Iterable of strings to add to the vectorstore.
|
texts: Iterable of strings to add to the vectorstore.
|
||||||
metadatas: Optional list of metadatas associated with the texts.
|
metadatas: Optional list of metadatas associated with the texts.
|
||||||
|
Default is None.
|
||||||
**kwargs: vectorstore specific parameters.
|
**kwargs: vectorstore specific parameters.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of ids from adding the texts into the vectorstore.
|
List of ids from adding the texts into the vectorstore.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the number of metadatas does not match the number of texts.
|
||||||
|
ValueError: If the number of ids does not match the number of texts.
|
||||||
"""
|
"""
|
||||||
if type(self).aupsert != VectorStore.aupsert:
|
if type(self).aupsert != VectorStore.aupsert:
|
||||||
# Import document in local scope to avoid circular imports
|
# Import document in local scope to avoid circular imports
|
||||||
@ -435,6 +454,9 @@ class VectorStore(ABC):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of IDs of the added texts.
|
List of IDs of the added texts.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the number of ids does not match the number of documents.
|
||||||
"""
|
"""
|
||||||
if type(self).upsert != VectorStore.upsert:
|
if type(self).upsert != VectorStore.upsert:
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
@ -471,13 +493,18 @@ class VectorStore(ABC):
|
|||||||
async def aadd_documents(
|
async def aadd_documents(
|
||||||
self, documents: List[Document], **kwargs: Any
|
self, documents: List[Document], **kwargs: Any
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""Run more documents through the embeddings and add to the vectorstore.
|
"""Async run more documents through the embeddings and add to
|
||||||
|
the vectorstore.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
documents: Documents to add to the vectorstore.
|
documents: Documents to add to the vectorstore.
|
||||||
|
kwargs: Additional keyword arguments.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of IDs of the added texts.
|
List of IDs of the added texts.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the number of IDs does not match the number of documents.
|
||||||
"""
|
"""
|
||||||
# If either upsert or aupsert has been implemented, we delegate to them!
|
# If either upsert or aupsert has been implemented, we delegate to them!
|
||||||
if (
|
if (
|
||||||
@ -516,13 +543,20 @@ class VectorStore(ABC):
|
|||||||
return await self.aadd_texts(texts, metadatas, **kwargs)
|
return await self.aadd_texts(texts, metadatas, **kwargs)
|
||||||
|
|
||||||
def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
|
def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
|
||||||
"""Return docs most similar to query using specified search type.
|
"""Return docs most similar to query using a specified search type.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: Input text
|
query: Input text
|
||||||
search_type: Type of search to perform. Can be "similarity",
|
search_type: Type of search to perform. Can be "similarity",
|
||||||
"mmr", or "similarity_score_threshold".
|
"mmr", or "similarity_score_threshold".
|
||||||
**kwargs: Arguments to pass to the search method.
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Documents most similar to the query.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If search_type is not one of "similarity",
|
||||||
|
"mmr", or "similarity_score_threshold".
|
||||||
"""
|
"""
|
||||||
if search_type == "similarity":
|
if search_type == "similarity":
|
||||||
return self.similarity_search(query, **kwargs)
|
return self.similarity_search(query, **kwargs)
|
||||||
@ -536,19 +570,27 @@ class VectorStore(ABC):
|
|||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"search_type of {search_type} not allowed. Expected "
|
f"search_type of {search_type} not allowed. Expected "
|
||||||
"search_type to be 'similarity', 'similarity_score_threshold' or 'mmr'."
|
"search_type to be 'similarity', 'similarity_score_threshold'"
|
||||||
|
" or 'mmr'."
|
||||||
)
|
)
|
||||||
|
|
||||||
async def asearch(
|
async def asearch(
|
||||||
self, query: str, search_type: str, **kwargs: Any
|
self, query: str, search_type: str, **kwargs: Any
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs most similar to query using specified search type.
|
"""Async return docs most similar to query using a specified search type.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: Input text.
|
query: Input text.
|
||||||
search_type: Type of search to perform. Can be "similarity",
|
search_type: Type of search to perform. Can be "similarity",
|
||||||
"mmr", or "similarity_score_threshold".
|
"mmr", or "similarity_score_threshold".
|
||||||
**kwargs: Arguments to pass to the search method.
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Documents most similar to the query.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If search_type is not one of "similarity",
|
||||||
|
"mmr", or "similarity_score_threshold".
|
||||||
"""
|
"""
|
||||||
if search_type == "similarity":
|
if search_type == "similarity":
|
||||||
return await self.asimilarity_search(query, **kwargs)
|
return await self.asimilarity_search(query, **kwargs)
|
||||||
@ -574,6 +616,7 @@ class VectorStore(ABC):
|
|||||||
Args:
|
Args:
|
||||||
query: Input text.
|
query: Input text.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query.
|
List of Documents most similar to the query.
|
||||||
@ -589,7 +632,7 @@ class VectorStore(ABC):
|
|||||||
# others are not!)
|
# others are not!)
|
||||||
# - embedding dimensionality
|
# - embedding dimensionality
|
||||||
# - etc.
|
# - etc.
|
||||||
# This function converts the euclidean norm of normalized embeddings
|
# This function converts the Euclidean norm of normalized embeddings
|
||||||
# (0 is most similar, sqrt(2) most dissimilar)
|
# (0 is most similar, sqrt(2) most dissimilar)
|
||||||
# to a similarity function (0 to 1)
|
# to a similarity function (0 to 1)
|
||||||
return 1.0 - distance / math.sqrt(2)
|
return 1.0 - distance / math.sqrt(2)
|
||||||
@ -617,7 +660,7 @@ class VectorStore(ABC):
|
|||||||
- embedding dimensionality
|
- embedding dimensionality
|
||||||
- etc.
|
- etc.
|
||||||
|
|
||||||
Vectorstores should define their own selection based method of relevance.
|
Vectorstores should define their own selection-based method of relevance.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@ -626,18 +669,26 @@ class VectorStore(ABC):
|
|||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
"""Run similarity search with distance.
|
"""Run similarity search with distance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
*args: Arguments to pass to the search method.
|
||||||
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Tuples of (doc, similarity_score)
|
List of Tuples of (doc, similarity_score).
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
async def asimilarity_search_with_score(
|
async def asimilarity_search_with_score(
|
||||||
self, *args: Any, **kwargs: Any
|
self, *args: Any, **kwargs: Any
|
||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
"""Run similarity search with distance.
|
"""Async run similarity search with distance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
*args: Arguments to pass to the search method.
|
||||||
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Tuples of (doc, similarity_score)
|
List of Tuples of (doc, similarity_score).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# This is a temporary workaround to make the similarity search
|
# This is a temporary workaround to make the similarity search
|
||||||
@ -716,10 +767,10 @@ class VectorStore(ABC):
|
|||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
**kwargs: kwargs to be passed to similarity search. Should include:
|
**kwargs: kwargs to be passed to similarity search. Should include:
|
||||||
score_threshold: Optional, a floating point value between 0 to 1 to
|
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||||
filter the resulting set of retrieved docs
|
filter the resulting set of retrieved docs.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Tuples of (doc, similarity_score)
|
List of Tuples of (doc, similarity_score).
|
||||||
"""
|
"""
|
||||||
score_threshold = kwargs.pop("score_threshold", None)
|
score_threshold = kwargs.pop("score_threshold", None)
|
||||||
|
|
||||||
@ -754,7 +805,7 @@ class VectorStore(ABC):
|
|||||||
k: int = 4,
|
k: int = 4,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
"""Return docs and relevance scores in the range [0, 1].
|
"""Async return docs and relevance scores in the range [0, 1].
|
||||||
|
|
||||||
0 is dissimilar, 1 is most similar.
|
0 is dissimilar, 1 is most similar.
|
||||||
|
|
||||||
@ -798,11 +849,12 @@ class VectorStore(ABC):
|
|||||||
async def asimilarity_search(
|
async def asimilarity_search(
|
||||||
self, query: str, k: int = 4, **kwargs: Any
|
self, query: str, k: int = 4, **kwargs: Any
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs most similar to query.
|
"""Async return docs most similar to query.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: Input text.
|
query: Input text.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query.
|
List of Documents most similar to the query.
|
||||||
@ -821,6 +873,7 @@ class VectorStore(ABC):
|
|||||||
Args:
|
Args:
|
||||||
embedding: Embedding to look up documents similar to.
|
embedding: Embedding to look up documents similar to.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query vector.
|
List of Documents most similar to the query vector.
|
||||||
@ -830,11 +883,12 @@ class VectorStore(ABC):
|
|||||||
async def asimilarity_search_by_vector(
|
async def asimilarity_search_by_vector(
|
||||||
self, embedding: List[float], k: int = 4, **kwargs: Any
|
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs most similar to embedding vector.
|
"""Async return docs most similar to embedding vector.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
embedding: Embedding to look up documents similar to.
|
embedding: Embedding to look up documents similar to.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query vector.
|
List of Documents most similar to the query vector.
|
||||||
@ -864,10 +918,13 @@ class VectorStore(ABC):
|
|||||||
query: Text to look up documents similar to.
|
query: Text to look up documents similar to.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||||
|
Default is 20.
|
||||||
lambda_mult: Number between 0 and 1 that determines the degree
|
lambda_mult: Number between 0 and 1 that determines the degree
|
||||||
of diversity among the results with 0 corresponding
|
of diversity among the results with 0 corresponding
|
||||||
to maximum diversity and 1 to minimum diversity.
|
to maximum diversity and 1 to minimum diversity.
|
||||||
Defaults to 0.5.
|
Defaults to 0.5.
|
||||||
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Documents selected by maximal marginal relevance.
|
List of Documents selected by maximal marginal relevance.
|
||||||
"""
|
"""
|
||||||
@ -881,7 +938,7 @@ class VectorStore(ABC):
|
|||||||
lambda_mult: float = 0.5,
|
lambda_mult: float = 0.5,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs selected using the maximal marginal relevance.
|
"""Async return docs selected using the maximal marginal relevance.
|
||||||
|
|
||||||
Maximal marginal relevance optimizes for similarity to query AND diversity
|
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||||
among selected documents.
|
among selected documents.
|
||||||
@ -890,10 +947,12 @@ class VectorStore(ABC):
|
|||||||
query: Text to look up documents similar to.
|
query: Text to look up documents similar to.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||||
|
Default is 20.
|
||||||
lambda_mult: Number between 0 and 1 that determines the degree
|
lambda_mult: Number between 0 and 1 that determines the degree
|
||||||
of diversity among the results with 0 corresponding
|
of diversity among the results with 0 corresponding
|
||||||
to maximum diversity and 1 to minimum diversity.
|
to maximum diversity and 1 to minimum diversity.
|
||||||
Defaults to 0.5.
|
Defaults to 0.5.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Documents selected by maximal marginal relevance.
|
List of Documents selected by maximal marginal relevance.
|
||||||
"""
|
"""
|
||||||
@ -928,10 +987,13 @@ class VectorStore(ABC):
|
|||||||
embedding: Embedding to look up documents similar to.
|
embedding: Embedding to look up documents similar to.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||||
|
Default is 20.
|
||||||
lambda_mult: Number between 0 and 1 that determines the degree
|
lambda_mult: Number between 0 and 1 that determines the degree
|
||||||
of diversity among the results with 0 corresponding
|
of diversity among the results with 0 corresponding
|
||||||
to maximum diversity and 1 to minimum diversity.
|
to maximum diversity and 1 to minimum diversity.
|
||||||
Defaults to 0.5.
|
Defaults to 0.5.
|
||||||
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Documents selected by maximal marginal relevance.
|
List of Documents selected by maximal marginal relevance.
|
||||||
"""
|
"""
|
||||||
@ -945,7 +1007,7 @@ class VectorStore(ABC):
|
|||||||
lambda_mult: float = 0.5,
|
lambda_mult: float = 0.5,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs selected using the maximal marginal relevance.
|
"""Async return docs selected using the maximal marginal relevance.
|
||||||
|
|
||||||
Maximal marginal relevance optimizes for similarity to query AND diversity
|
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||||
among selected documents.
|
among selected documents.
|
||||||
@ -954,10 +1016,13 @@ class VectorStore(ABC):
|
|||||||
embedding: Embedding to look up documents similar to.
|
embedding: Embedding to look up documents similar to.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||||
|
Default is 20.
|
||||||
lambda_mult: Number between 0 and 1 that determines the degree
|
lambda_mult: Number between 0 and 1 that determines the degree
|
||||||
of diversity among the results with 0 corresponding
|
of diversity among the results with 0 corresponding
|
||||||
to maximum diversity and 1 to minimum diversity.
|
to maximum diversity and 1 to minimum diversity.
|
||||||
Defaults to 0.5.
|
Defaults to 0.5.
|
||||||
|
**kwargs: Arguments to pass to the search method.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Documents selected by maximal marginal relevance.
|
List of Documents selected by maximal marginal relevance.
|
||||||
"""
|
"""
|
||||||
@ -983,6 +1048,10 @@ class VectorStore(ABC):
|
|||||||
Args:
|
Args:
|
||||||
documents: List of Documents to add to the vectorstore.
|
documents: List of Documents to add to the vectorstore.
|
||||||
embedding: Embedding function to use.
|
embedding: Embedding function to use.
|
||||||
|
**kwargs: Additional keyword arguments.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
VectorStore: VectorStore initialized from documents and embeddings.
|
||||||
"""
|
"""
|
||||||
texts = [d.page_content for d in documents]
|
texts = [d.page_content for d in documents]
|
||||||
metadatas = [d.metadata for d in documents]
|
metadatas = [d.metadata for d in documents]
|
||||||
@ -995,11 +1064,15 @@ class VectorStore(ABC):
|
|||||||
embedding: Embeddings,
|
embedding: Embeddings,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> VST:
|
) -> VST:
|
||||||
"""Return VectorStore initialized from documents and embeddings.
|
"""Async return VectorStore initialized from documents and embeddings.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
documents: List of Documents to add to the vectorstore.
|
documents: List of Documents to add to the vectorstore.
|
||||||
embedding: Embedding function to use.
|
embedding: Embedding function to use.
|
||||||
|
**kwargs: Additional keyword arguments.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
VectorStore: VectorStore initialized from documents and embeddings.
|
||||||
"""
|
"""
|
||||||
texts = [d.page_content for d in documents]
|
texts = [d.page_content for d in documents]
|
||||||
metadatas = [d.metadata for d in documents]
|
metadatas = [d.metadata for d in documents]
|
||||||
@ -1018,8 +1091,13 @@ class VectorStore(ABC):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
texts: Texts to add to the vectorstore.
|
texts: Texts to add to the vectorstore.
|
||||||
metadatas: Optional list of metadatas associated with the texts.
|
|
||||||
embedding: Embedding function to use.
|
embedding: Embedding function to use.
|
||||||
|
metadatas: Optional list of metadatas associated with the texts.
|
||||||
|
Default is None.
|
||||||
|
**kwargs: Additional keyword arguments.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
VectorStore: VectorStore initialized from texts and embeddings.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -1030,12 +1108,17 @@ class VectorStore(ABC):
|
|||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> VST:
|
) -> VST:
|
||||||
"""Return VectorStore initialized from texts and embeddings.
|
"""Async return VectorStore initialized from texts and embeddings.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
texts: Texts to add to the vectorstore.
|
texts: Texts to add to the vectorstore.
|
||||||
metadatas: Optional list of metadatas associated with the texts.
|
|
||||||
embedding: Embedding function to use.
|
embedding: Embedding function to use.
|
||||||
|
metadatas: Optional list of metadatas associated with the texts.
|
||||||
|
Default is None.
|
||||||
|
**kwargs: Additional keyword arguments.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
VectorStore: VectorStore initialized from texts and embeddings.
|
||||||
"""
|
"""
|
||||||
return await run_in_executor(
|
return await run_in_executor(
|
||||||
None, cls.from_texts, texts, embedding, metadatas, **kwargs
|
None, cls.from_texts, texts, embedding, metadatas, **kwargs
|
||||||
@ -1052,6 +1135,8 @@ class VectorStore(ABC):
|
|||||||
"""Return VectorStoreRetriever initialized from this VectorStore.
|
"""Return VectorStoreRetriever initialized from this VectorStore.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
**kwargs: Keyword arguments to pass to the search function.
|
||||||
|
Can include:
|
||||||
search_type (Optional[str]): Defines the type of search that
|
search_type (Optional[str]): Defines the type of search that
|
||||||
the Retriever should perform.
|
the Retriever should perform.
|
||||||
Can be "similarity" (default), "mmr", or
|
Can be "similarity" (default), "mmr", or
|
||||||
@ -1061,7 +1146,8 @@ class VectorStore(ABC):
|
|||||||
k: Amount of documents to return (Default: 4)
|
k: Amount of documents to return (Default: 4)
|
||||||
score_threshold: Minimum relevance threshold
|
score_threshold: Minimum relevance threshold
|
||||||
for similarity_score_threshold
|
for similarity_score_threshold
|
||||||
fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
|
fetch_k: Amount of documents to pass to MMR algorithm
|
||||||
|
(Default: 20)
|
||||||
lambda_mult: Diversity of results returned by MMR;
|
lambda_mult: Diversity of results returned by MMR;
|
||||||
1 for minimum diversity and 0 for maximum. (Default: 0.5)
|
1 for minimum diversity and 0 for maximum. (Default: 0.5)
|
||||||
filter: Filter by document metadata
|
filter: Filter by document metadata
|
||||||
@ -1128,7 +1214,18 @@ class VectorStoreRetriever(BaseRetriever):
|
|||||||
|
|
||||||
@root_validator(pre=True)
|
@root_validator(pre=True)
|
||||||
def validate_search_type(cls, values: Dict) -> Dict:
|
def validate_search_type(cls, values: Dict) -> Dict:
|
||||||
"""Validate search type."""
|
"""Validate search type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
values: Values to validate.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Values: Validated values.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If search_type is not one of the allowed search types.
|
||||||
|
ValueError: If score_threshold is not specified with a float value(0~1)
|
||||||
|
"""
|
||||||
search_type = values.get("search_type", "similarity")
|
search_type = values.get("search_type", "similarity")
|
||||||
if search_type not in cls.allowed_search_types:
|
if search_type not in cls.allowed_search_types:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@ -1191,6 +1288,7 @@ class VectorStoreRetriever(BaseRetriever):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
documents: Documents to add to the vectorstore.
|
documents: Documents to add to the vectorstore.
|
||||||
|
**kwargs: Other keyword arguments that subclasses might use.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of IDs of the added texts.
|
List of IDs of the added texts.
|
||||||
@ -1200,10 +1298,11 @@ class VectorStoreRetriever(BaseRetriever):
|
|||||||
async def aadd_documents(
|
async def aadd_documents(
|
||||||
self, documents: List[Document], **kwargs: Any
|
self, documents: List[Document], **kwargs: Any
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""Add documents to the vectorstore.
|
"""Async add documents to the vectorstore.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
documents: Documents to add to the vectorstore.
|
documents: Documents to add to the vectorstore.
|
||||||
|
**kwargs: Other keyword arguments that subclasses might use.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of IDs of the added texts.
|
List of IDs of the added texts.
|
||||||
|
@ -32,13 +32,14 @@ class InMemoryVectorStore(VectorStore):
|
|||||||
"""In-memory implementation of VectorStore using a dictionary.
|
"""In-memory implementation of VectorStore using a dictionary.
|
||||||
|
|
||||||
Uses numpy to compute cosine similarity for search.
|
Uses numpy to compute cosine similarity for search.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, embedding: Embeddings) -> None:
|
||||||
|
"""Initialize with the given embedding function.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
embedding: embedding function to use.
|
embedding: embedding function to use.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, embedding: Embeddings) -> None:
|
|
||||||
"""Initialize with the given embedding function."""
|
|
||||||
# TODO: would be nice to change to
|
# TODO: would be nice to change to
|
||||||
# Dict[str, Document] at some point (will be a breaking change)
|
# Dict[str, Document] at some point (will be a breaking change)
|
||||||
self.store: Dict[str, Dict[str, Any]] = {}
|
self.store: Dict[str, Dict[str, Any]] = {}
|
||||||
@ -74,7 +75,14 @@ class InMemoryVectorStore(VectorStore):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def get_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
def get_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
||||||
"""Get documents by their ids."""
|
"""Get documents by their ids.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ids: The ids of the documents to get.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of Document objects.
|
||||||
|
"""
|
||||||
documents = []
|
documents = []
|
||||||
|
|
||||||
for doc_id in ids:
|
for doc_id in ids:
|
||||||
@ -90,6 +98,14 @@ class InMemoryVectorStore(VectorStore):
|
|||||||
return documents
|
return documents
|
||||||
|
|
||||||
async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
||||||
|
"""Async get documents by their ids.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ids: The ids of the documents to get.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of Document objects.
|
||||||
|
"""
|
||||||
return self.get_by_ids(ids)
|
return self.get_by_ids(ids)
|
||||||
|
|
||||||
async def aadd_texts(
|
async def aadd_texts(
|
||||||
@ -261,6 +277,16 @@ class InMemoryVectorStore(VectorStore):
|
|||||||
def load(
|
def load(
|
||||||
cls, path: str, embedding: Embeddings, **kwargs: Any
|
cls, path: str, embedding: Embeddings, **kwargs: Any
|
||||||
) -> "InMemoryVectorStore":
|
) -> "InMemoryVectorStore":
|
||||||
|
"""Load a vector store from a file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: The path to load the vector store from.
|
||||||
|
embedding: The embedding to use.
|
||||||
|
**kwargs: Additional arguments to pass to the constructor.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A VectorStore object.
|
||||||
|
"""
|
||||||
_path: Path = Path(path)
|
_path: Path = Path(path)
|
||||||
with _path.open("r") as f:
|
with _path.open("r") as f:
|
||||||
store = load(json.load(f))
|
store = load(json.load(f))
|
||||||
@ -269,6 +295,11 @@ class InMemoryVectorStore(VectorStore):
|
|||||||
return vectorstore
|
return vectorstore
|
||||||
|
|
||||||
def dump(self, path: str) -> None:
|
def dump(self, path: str) -> None:
|
||||||
|
"""Dump the vector store to a file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: The path to dump the vector store to.
|
||||||
|
"""
|
||||||
_path: Path = Path(path)
|
_path: Path = Path(path)
|
||||||
_path.parent.mkdir(exist_ok=True, parents=True)
|
_path.parent.mkdir(exist_ok=True, parents=True)
|
||||||
with _path.open("w") as f:
|
with _path.open("w") as f:
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
"""Internal utilities for the in memory implementation of VectorStore.
|
"""Internal utilities for the in memory implementation of VectorStore.
|
||||||
|
|
||||||
These are part of a private API and users should not used them directly
|
These are part of a private API, and users should not use them directly
|
||||||
as they can change without notice.
|
as they can change without notice.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -18,7 +18,20 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def _cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
|
def _cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
|
||||||
"""Row-wise cosine similarity between two equal-width matrices."""
|
"""Row-wise cosine similarity between two equal-width matrices.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X: A matrix of shape (n, m).
|
||||||
|
Y: A matrix of shape (k, m).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A matrix of shape (n, k) where each element (i, j) is the cosine similarity
|
||||||
|
between the ith row of X and the jth row of Y.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the number of columns in X and Y are not the same.
|
||||||
|
ImportError: If numpy is not installed.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
import numpy as np
|
import numpy as np
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -64,7 +77,20 @@ def _maximal_marginal_relevance(
|
|||||||
lambda_mult: float = 0.5,
|
lambda_mult: float = 0.5,
|
||||||
k: int = 4,
|
k: int = 4,
|
||||||
) -> List[int]:
|
) -> List[int]:
|
||||||
"""Calculate maximal marginal relevance."""
|
"""Calculate maximal marginal relevance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query_embedding: The query embedding.
|
||||||
|
embedding_list: A list of embeddings.
|
||||||
|
lambda_mult: The lambda parameter for MMR. Default is 0.5.
|
||||||
|
k: The number of embeddings to return. Default is 4.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of indices of the embeddings to return.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ImportError: If numpy is not installed.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
import numpy as np
|
import numpy as np
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
Loading…
Reference in New Issue
Block a user