core: docstrings vectorstores update (#24281)

Added missed docstrings. Formatted docstrings to the consistent form.

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Leonid Ganeline 2024-07-16 09:58:11 -07:00 committed by GitHub
parent 1e9cc02ed8
commit 5ccf8ebfac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 217 additions and 61 deletions

View File

@ -91,6 +91,10 @@ class VectorStore(ABC):
Returns: Returns:
List of ids from adding the texts into the vectorstore. List of ids from adding the texts into the vectorstore.
Raises:
ValueError: If the number of metadatas does not match the number of texts.
ValueError: If the number of ids does not match the number of texts.
""" """
if type(self).upsert != VectorStore.upsert: if type(self).upsert != VectorStore.upsert:
# Import document in local scope to avoid circular imports # Import document in local scope to avoid circular imports
@ -145,7 +149,12 @@ class VectorStore(ABC):
kwargs should only include parameters that are common to all kwargs should only include parameters that are common to all
documents. (e.g., timeout for indexing, retry policy, etc.) documents. (e.g., timeout for indexing, retry policy, etc.)
kwargs should not include ids to avoid ambiguous semantics. kwargs should not include ids to avoid ambiguous semantics.
Instead the ID should be provided as part of the Document object. Instead, the ID should be provided as part of the Document object.
Yields:
UpsertResponse: A response object that contains the list of IDs that were
successfully added or updated in the vectorstore and the list of IDs that
failed to be added or updated.
.. versionadded:: 0.2.11 .. versionadded:: 0.2.11
""" """
@ -244,6 +253,11 @@ class VectorStore(ABC):
kwargs should not include ids to avoid ambiguous semantics. kwargs should not include ids to avoid ambiguous semantics.
Instead the ID should be provided as part of the Document object. Instead the ID should be provided as part of the Document object.
Yields:
UpsertResponse: A response object that contains the list of IDs that were
successfully added or updated in the vectorstore and the list of IDs that
failed to be added or updated.
.. versionadded:: 0.2.11 .. versionadded:: 0.2.11
""" """
async for batch in abatch_iterate(batch_size, items): async for batch in abatch_iterate(batch_size, items):
@ -292,7 +306,7 @@ class VectorStore(ABC):
"""Delete by vector ID or other criteria. """Delete by vector ID or other criteria.
Args: Args:
ids: List of ids to delete. ids: List of ids to delete. If None, delete all. Default is None.
**kwargs: Other keyword arguments that subclasses might use. **kwargs: Other keyword arguments that subclasses might use.
Returns: Returns:
@ -332,7 +346,7 @@ class VectorStore(ABC):
# Implementations should override this method to provide an async native version. # Implementations should override this method to provide an async native version.
async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]: async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
"""Get documents by their IDs. """Async get documents by their IDs.
The returned documents are expected to have the ID field set to the ID of the The returned documents are expected to have the ID field set to the ID of the
document in the vector store. document in the vector store.
@ -360,10 +374,10 @@ class VectorStore(ABC):
async def adelete( async def adelete(
self, ids: Optional[List[str]] = None, **kwargs: Any self, ids: Optional[List[str]] = None, **kwargs: Any
) -> Optional[bool]: ) -> Optional[bool]:
"""Delete by vector ID or other criteria. """Async delete by vector ID or other criteria.
Args: Args:
ids: List of ids to delete. ids: List of ids to delete. If None, delete all. Default is None.
**kwargs: Other keyword arguments that subclasses might use. **kwargs: Other keyword arguments that subclasses might use.
Returns: Returns:
@ -378,15 +392,20 @@ class VectorStore(ABC):
metadatas: Optional[List[dict]] = None, metadatas: Optional[List[dict]] = None,
**kwargs: Any, **kwargs: Any,
) -> List[str]: ) -> List[str]:
"""Run more texts through the embeddings and add to the vectorstore. """Async run more texts through the embeddings and add to the vectorstore.
Args: Args:
texts: Iterable of strings to add to the vectorstore. texts: Iterable of strings to add to the vectorstore.
metadatas: Optional list of metadatas associated with the texts. metadatas: Optional list of metadatas associated with the texts.
Default is None.
**kwargs: vectorstore specific parameters. **kwargs: vectorstore specific parameters.
Returns: Returns:
List of ids from adding the texts into the vectorstore. List of ids from adding the texts into the vectorstore.
Raises:
ValueError: If the number of metadatas does not match the number of texts.
ValueError: If the number of ids does not match the number of texts.
""" """
if type(self).aupsert != VectorStore.aupsert: if type(self).aupsert != VectorStore.aupsert:
# Import document in local scope to avoid circular imports # Import document in local scope to avoid circular imports
@ -435,6 +454,9 @@ class VectorStore(ABC):
Returns: Returns:
List of IDs of the added texts. List of IDs of the added texts.
Raises:
ValueError: If the number of ids does not match the number of documents.
""" """
if type(self).upsert != VectorStore.upsert: if type(self).upsert != VectorStore.upsert:
from langchain_core.documents import Document from langchain_core.documents import Document
@ -471,13 +493,18 @@ class VectorStore(ABC):
async def aadd_documents( async def aadd_documents(
self, documents: List[Document], **kwargs: Any self, documents: List[Document], **kwargs: Any
) -> List[str]: ) -> List[str]:
"""Run more documents through the embeddings and add to the vectorstore. """Async run more documents through the embeddings and add to
the vectorstore.
Args: Args:
documents: Documents to add to the vectorstore. documents: Documents to add to the vectorstore.
kwargs: Additional keyword arguments.
Returns: Returns:
List of IDs of the added texts. List of IDs of the added texts.
Raises:
ValueError: If the number of IDs does not match the number of documents.
""" """
# If either upsert or aupsert has been implemented, we delegate to them! # If either upsert or aupsert has been implemented, we delegate to them!
if ( if (
@ -516,13 +543,20 @@ class VectorStore(ABC):
return await self.aadd_texts(texts, metadatas, **kwargs) return await self.aadd_texts(texts, metadatas, **kwargs)
def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]: def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
"""Return docs most similar to query using specified search type. """Return docs most similar to query using a specified search type.
Args: Args:
query: Input text query: Input text
search_type: Type of search to perform. Can be "similarity", search_type: Type of search to perform. Can be "similarity",
"mmr", or "similarity_score_threshold". "mmr", or "similarity_score_threshold".
**kwargs: Arguments to pass to the search method. **kwargs: Arguments to pass to the search method.
Returns:
List of Documents most similar to the query.
Raises:
ValueError: If search_type is not one of "similarity",
"mmr", or "similarity_score_threshold".
""" """
if search_type == "similarity": if search_type == "similarity":
return self.similarity_search(query, **kwargs) return self.similarity_search(query, **kwargs)
@ -536,19 +570,27 @@ class VectorStore(ABC):
else: else:
raise ValueError( raise ValueError(
f"search_type of {search_type} not allowed. Expected " f"search_type of {search_type} not allowed. Expected "
"search_type to be 'similarity', 'similarity_score_threshold' or 'mmr'." "search_type to be 'similarity', 'similarity_score_threshold'"
" or 'mmr'."
) )
async def asearch( async def asearch(
self, query: str, search_type: str, **kwargs: Any self, query: str, search_type: str, **kwargs: Any
) -> List[Document]: ) -> List[Document]:
"""Return docs most similar to query using specified search type. """Async return docs most similar to query using a specified search type.
Args: Args:
query: Input text. query: Input text.
search_type: Type of search to perform. Can be "similarity", search_type: Type of search to perform. Can be "similarity",
"mmr", or "similarity_score_threshold". "mmr", or "similarity_score_threshold".
**kwargs: Arguments to pass to the search method. **kwargs: Arguments to pass to the search method.
Returns:
List of Documents most similar to the query.
Raises:
ValueError: If search_type is not one of "similarity",
"mmr", or "similarity_score_threshold".
""" """
if search_type == "similarity": if search_type == "similarity":
return await self.asimilarity_search(query, **kwargs) return await self.asimilarity_search(query, **kwargs)
@ -574,6 +616,7 @@ class VectorStore(ABC):
Args: Args:
query: Input text. query: Input text.
k: Number of Documents to return. Defaults to 4. k: Number of Documents to return. Defaults to 4.
**kwargs: Arguments to pass to the search method.
Returns: Returns:
List of Documents most similar to the query. List of Documents most similar to the query.
@ -589,7 +632,7 @@ class VectorStore(ABC):
# others are not!) # others are not!)
# - embedding dimensionality # - embedding dimensionality
# - etc. # - etc.
# This function converts the euclidean norm of normalized embeddings # This function converts the Euclidean norm of normalized embeddings
# (0 is most similar, sqrt(2) most dissimilar) # (0 is most similar, sqrt(2) most dissimilar)
# to a similarity function (0 to 1) # to a similarity function (0 to 1)
return 1.0 - distance / math.sqrt(2) return 1.0 - distance / math.sqrt(2)
@ -617,7 +660,7 @@ class VectorStore(ABC):
- embedding dimensionality - embedding dimensionality
- etc. - etc.
Vectorstores should define their own selection based method of relevance. Vectorstores should define their own selection-based method of relevance.
""" """
raise NotImplementedError raise NotImplementedError
@ -626,18 +669,26 @@ class VectorStore(ABC):
) -> List[Tuple[Document, float]]: ) -> List[Tuple[Document, float]]:
"""Run similarity search with distance. """Run similarity search with distance.
Args:
*args: Arguments to pass to the search method.
**kwargs: Arguments to pass to the search method.
Returns: Returns:
List of Tuples of (doc, similarity_score) List of Tuples of (doc, similarity_score).
""" """
raise NotImplementedError raise NotImplementedError
async def asimilarity_search_with_score( async def asimilarity_search_with_score(
self, *args: Any, **kwargs: Any self, *args: Any, **kwargs: Any
) -> List[Tuple[Document, float]]: ) -> List[Tuple[Document, float]]:
"""Run similarity search with distance. """Async run similarity search with distance.
Args:
*args: Arguments to pass to the search method.
**kwargs: Arguments to pass to the search method.
Returns: Returns:
List of Tuples of (doc, similarity_score) List of Tuples of (doc, similarity_score).
""" """
# This is a temporary workaround to make the similarity search # This is a temporary workaround to make the similarity search
@ -716,10 +767,10 @@ class VectorStore(ABC):
k: Number of Documents to return. Defaults to 4. k: Number of Documents to return. Defaults to 4.
**kwargs: kwargs to be passed to similarity search. Should include: **kwargs: kwargs to be passed to similarity search. Should include:
score_threshold: Optional, a floating point value between 0 to 1 to score_threshold: Optional, a floating point value between 0 to 1 to
filter the resulting set of retrieved docs filter the resulting set of retrieved docs.
Returns: Returns:
List of Tuples of (doc, similarity_score) List of Tuples of (doc, similarity_score).
""" """
score_threshold = kwargs.pop("score_threshold", None) score_threshold = kwargs.pop("score_threshold", None)
@ -754,7 +805,7 @@ class VectorStore(ABC):
k: int = 4, k: int = 4,
**kwargs: Any, **kwargs: Any,
) -> List[Tuple[Document, float]]: ) -> List[Tuple[Document, float]]:
"""Return docs and relevance scores in the range [0, 1]. """Async return docs and relevance scores in the range [0, 1].
0 is dissimilar, 1 is most similar. 0 is dissimilar, 1 is most similar.
@ -798,11 +849,12 @@ class VectorStore(ABC):
async def asimilarity_search( async def asimilarity_search(
self, query: str, k: int = 4, **kwargs: Any self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]: ) -> List[Document]:
"""Return docs most similar to query. """Async return docs most similar to query.
Args: Args:
query: Input text. query: Input text.
k: Number of Documents to return. Defaults to 4. k: Number of Documents to return. Defaults to 4.
**kwargs: Arguments to pass to the search method.
Returns: Returns:
List of Documents most similar to the query. List of Documents most similar to the query.
@ -821,6 +873,7 @@ class VectorStore(ABC):
Args: Args:
embedding: Embedding to look up documents similar to. embedding: Embedding to look up documents similar to.
k: Number of Documents to return. Defaults to 4. k: Number of Documents to return. Defaults to 4.
**kwargs: Arguments to pass to the search method.
Returns: Returns:
List of Documents most similar to the query vector. List of Documents most similar to the query vector.
@ -830,11 +883,12 @@ class VectorStore(ABC):
async def asimilarity_search_by_vector( async def asimilarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]: ) -> List[Document]:
"""Return docs most similar to embedding vector. """Async return docs most similar to embedding vector.
Args: Args:
embedding: Embedding to look up documents similar to. embedding: Embedding to look up documents similar to.
k: Number of Documents to return. Defaults to 4. k: Number of Documents to return. Defaults to 4.
**kwargs: Arguments to pass to the search method.
Returns: Returns:
List of Documents most similar to the query vector. List of Documents most similar to the query vector.
@ -864,10 +918,13 @@ class VectorStore(ABC):
query: Text to look up documents similar to. query: Text to look up documents similar to.
k: Number of Documents to return. Defaults to 4. k: Number of Documents to return. Defaults to 4.
fetch_k: Number of Documents to fetch to pass to MMR algorithm. fetch_k: Number of Documents to fetch to pass to MMR algorithm.
Default is 20.
lambda_mult: Number between 0 and 1 that determines the degree lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity. to maximum diversity and 1 to minimum diversity.
Defaults to 0.5. Defaults to 0.5.
**kwargs: Arguments to pass to the search method.
Returns: Returns:
List of Documents selected by maximal marginal relevance. List of Documents selected by maximal marginal relevance.
""" """
@ -881,7 +938,7 @@ class VectorStore(ABC):
lambda_mult: float = 0.5, lambda_mult: float = 0.5,
**kwargs: Any, **kwargs: Any,
) -> List[Document]: ) -> List[Document]:
"""Return docs selected using the maximal marginal relevance. """Async return docs selected using the maximal marginal relevance.
Maximal marginal relevance optimizes for similarity to query AND diversity Maximal marginal relevance optimizes for similarity to query AND diversity
among selected documents. among selected documents.
@ -890,10 +947,12 @@ class VectorStore(ABC):
query: Text to look up documents similar to. query: Text to look up documents similar to.
k: Number of Documents to return. Defaults to 4. k: Number of Documents to return. Defaults to 4.
fetch_k: Number of Documents to fetch to pass to MMR algorithm. fetch_k: Number of Documents to fetch to pass to MMR algorithm.
Default is 20.
lambda_mult: Number between 0 and 1 that determines the degree lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity. to maximum diversity and 1 to minimum diversity.
Defaults to 0.5. Defaults to 0.5.
Returns: Returns:
List of Documents selected by maximal marginal relevance. List of Documents selected by maximal marginal relevance.
""" """
@ -928,10 +987,13 @@ class VectorStore(ABC):
embedding: Embedding to look up documents similar to. embedding: Embedding to look up documents similar to.
k: Number of Documents to return. Defaults to 4. k: Number of Documents to return. Defaults to 4.
fetch_k: Number of Documents to fetch to pass to MMR algorithm. fetch_k: Number of Documents to fetch to pass to MMR algorithm.
Default is 20.
lambda_mult: Number between 0 and 1 that determines the degree lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity. to maximum diversity and 1 to minimum diversity.
Defaults to 0.5. Defaults to 0.5.
**kwargs: Arguments to pass to the search method.
Returns: Returns:
List of Documents selected by maximal marginal relevance. List of Documents selected by maximal marginal relevance.
""" """
@ -945,7 +1007,7 @@ class VectorStore(ABC):
lambda_mult: float = 0.5, lambda_mult: float = 0.5,
**kwargs: Any, **kwargs: Any,
) -> List[Document]: ) -> List[Document]:
"""Return docs selected using the maximal marginal relevance. """Async return docs selected using the maximal marginal relevance.
Maximal marginal relevance optimizes for similarity to query AND diversity Maximal marginal relevance optimizes for similarity to query AND diversity
among selected documents. among selected documents.
@ -954,10 +1016,13 @@ class VectorStore(ABC):
embedding: Embedding to look up documents similar to. embedding: Embedding to look up documents similar to.
k: Number of Documents to return. Defaults to 4. k: Number of Documents to return. Defaults to 4.
fetch_k: Number of Documents to fetch to pass to MMR algorithm. fetch_k: Number of Documents to fetch to pass to MMR algorithm.
Default is 20.
lambda_mult: Number between 0 and 1 that determines the degree lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity. to maximum diversity and 1 to minimum diversity.
Defaults to 0.5. Defaults to 0.5.
**kwargs: Arguments to pass to the search method.
Returns: Returns:
List of Documents selected by maximal marginal relevance. List of Documents selected by maximal marginal relevance.
""" """
@ -983,6 +1048,10 @@ class VectorStore(ABC):
Args: Args:
documents: List of Documents to add to the vectorstore. documents: List of Documents to add to the vectorstore.
embedding: Embedding function to use. embedding: Embedding function to use.
**kwargs: Additional keyword arguments.
Returns:
VectorStore: VectorStore initialized from documents and embeddings.
""" """
texts = [d.page_content for d in documents] texts = [d.page_content for d in documents]
metadatas = [d.metadata for d in documents] metadatas = [d.metadata for d in documents]
@ -995,11 +1064,15 @@ class VectorStore(ABC):
embedding: Embeddings, embedding: Embeddings,
**kwargs: Any, **kwargs: Any,
) -> VST: ) -> VST:
"""Return VectorStore initialized from documents and embeddings. """Async return VectorStore initialized from documents and embeddings.
Args: Args:
documents: List of Documents to add to the vectorstore. documents: List of Documents to add to the vectorstore.
embedding: Embedding function to use. embedding: Embedding function to use.
**kwargs: Additional keyword arguments.
Returns:
VectorStore: VectorStore initialized from documents and embeddings.
""" """
texts = [d.page_content for d in documents] texts = [d.page_content for d in documents]
metadatas = [d.metadata for d in documents] metadatas = [d.metadata for d in documents]
@ -1018,8 +1091,13 @@ class VectorStore(ABC):
Args: Args:
texts: Texts to add to the vectorstore. texts: Texts to add to the vectorstore.
metadatas: Optional list of metadatas associated with the texts.
embedding: Embedding function to use. embedding: Embedding function to use.
metadatas: Optional list of metadatas associated with the texts.
Default is None.
**kwargs: Additional keyword arguments.
Returns:
VectorStore: VectorStore initialized from texts and embeddings.
""" """
@classmethod @classmethod
@ -1030,12 +1108,17 @@ class VectorStore(ABC):
metadatas: Optional[List[dict]] = None, metadatas: Optional[List[dict]] = None,
**kwargs: Any, **kwargs: Any,
) -> VST: ) -> VST:
"""Return VectorStore initialized from texts and embeddings. """Async return VectorStore initialized from texts and embeddings.
Args: Args:
texts: Texts to add to the vectorstore. texts: Texts to add to the vectorstore.
metadatas: Optional list of metadatas associated with the texts.
embedding: Embedding function to use. embedding: Embedding function to use.
metadatas: Optional list of metadatas associated with the texts.
Default is None.
**kwargs: Additional keyword arguments.
Returns:
VectorStore: VectorStore initialized from texts and embeddings.
""" """
return await run_in_executor( return await run_in_executor(
None, cls.from_texts, texts, embedding, metadatas, **kwargs None, cls.from_texts, texts, embedding, metadatas, **kwargs
@ -1052,6 +1135,8 @@ class VectorStore(ABC):
"""Return VectorStoreRetriever initialized from this VectorStore. """Return VectorStoreRetriever initialized from this VectorStore.
Args: Args:
**kwargs: Keyword arguments to pass to the search function.
Can include:
search_type (Optional[str]): Defines the type of search that search_type (Optional[str]): Defines the type of search that
the Retriever should perform. the Retriever should perform.
Can be "similarity" (default), "mmr", or Can be "similarity" (default), "mmr", or
@ -1061,7 +1146,8 @@ class VectorStore(ABC):
k: Amount of documents to return (Default: 4) k: Amount of documents to return (Default: 4)
score_threshold: Minimum relevance threshold score_threshold: Minimum relevance threshold
for similarity_score_threshold for similarity_score_threshold
fetch_k: Amount of documents to pass to MMR algorithm (Default: 20) fetch_k: Amount of documents to pass to MMR algorithm
(Default: 20)
lambda_mult: Diversity of results returned by MMR; lambda_mult: Diversity of results returned by MMR;
1 for minimum diversity and 0 for maximum. (Default: 0.5) 1 for minimum diversity and 0 for maximum. (Default: 0.5)
filter: Filter by document metadata filter: Filter by document metadata
@ -1128,7 +1214,18 @@ class VectorStoreRetriever(BaseRetriever):
@root_validator(pre=True) @root_validator(pre=True)
def validate_search_type(cls, values: Dict) -> Dict: def validate_search_type(cls, values: Dict) -> Dict:
"""Validate search type.""" """Validate search type.
Args:
values: Values to validate.
Returns:
Values: Validated values.
Raises:
ValueError: If search_type is not one of the allowed search types.
ValueError: If score_threshold is not specified with a float value(0~1)
"""
search_type = values.get("search_type", "similarity") search_type = values.get("search_type", "similarity")
if search_type not in cls.allowed_search_types: if search_type not in cls.allowed_search_types:
raise ValueError( raise ValueError(
@ -1191,6 +1288,7 @@ class VectorStoreRetriever(BaseRetriever):
Args: Args:
documents: Documents to add to the vectorstore. documents: Documents to add to the vectorstore.
**kwargs: Other keyword arguments that subclasses might use.
Returns: Returns:
List of IDs of the added texts. List of IDs of the added texts.
@ -1200,10 +1298,11 @@ class VectorStoreRetriever(BaseRetriever):
async def aadd_documents( async def aadd_documents(
self, documents: List[Document], **kwargs: Any self, documents: List[Document], **kwargs: Any
) -> List[str]: ) -> List[str]:
"""Add documents to the vectorstore. """Async add documents to the vectorstore.
Args: Args:
documents: Documents to add to the vectorstore. documents: Documents to add to the vectorstore.
**kwargs: Other keyword arguments that subclasses might use.
Returns: Returns:
List of IDs of the added texts. List of IDs of the added texts.

View File

@ -32,13 +32,14 @@ class InMemoryVectorStore(VectorStore):
"""In-memory implementation of VectorStore using a dictionary. """In-memory implementation of VectorStore using a dictionary.
Uses numpy to compute cosine similarity for search. Uses numpy to compute cosine similarity for search.
"""
def __init__(self, embedding: Embeddings) -> None:
"""Initialize with the given embedding function.
Args: Args:
embedding: embedding function to use. embedding: embedding function to use.
""" """
def __init__(self, embedding: Embeddings) -> None:
"""Initialize with the given embedding function."""
# TODO: would be nice to change to # TODO: would be nice to change to
# Dict[str, Document] at some point (will be a breaking change) # Dict[str, Document] at some point (will be a breaking change)
self.store: Dict[str, Dict[str, Any]] = {} self.store: Dict[str, Dict[str, Any]] = {}
@ -74,7 +75,14 @@ class InMemoryVectorStore(VectorStore):
} }
def get_by_ids(self, ids: Sequence[str], /) -> List[Document]: def get_by_ids(self, ids: Sequence[str], /) -> List[Document]:
"""Get documents by their ids.""" """Get documents by their ids.
Args:
ids: The ids of the documents to get.
Returns:
A list of Document objects.
"""
documents = [] documents = []
for doc_id in ids: for doc_id in ids:
@ -90,6 +98,14 @@ class InMemoryVectorStore(VectorStore):
return documents return documents
async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]: async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
"""Async get documents by their ids.
Args:
ids: The ids of the documents to get.
Returns:
A list of Document objects.
"""
return self.get_by_ids(ids) return self.get_by_ids(ids)
async def aadd_texts( async def aadd_texts(
@ -261,6 +277,16 @@ class InMemoryVectorStore(VectorStore):
def load( def load(
cls, path: str, embedding: Embeddings, **kwargs: Any cls, path: str, embedding: Embeddings, **kwargs: Any
) -> "InMemoryVectorStore": ) -> "InMemoryVectorStore":
"""Load a vector store from a file.
Args:
path: The path to load the vector store from.
embedding: The embedding to use.
**kwargs: Additional arguments to pass to the constructor.
Returns:
A VectorStore object.
"""
_path: Path = Path(path) _path: Path = Path(path)
with _path.open("r") as f: with _path.open("r") as f:
store = load(json.load(f)) store = load(json.load(f))
@ -269,6 +295,11 @@ class InMemoryVectorStore(VectorStore):
return vectorstore return vectorstore
def dump(self, path: str) -> None: def dump(self, path: str) -> None:
"""Dump the vector store to a file.
Args:
path: The path to dump the vector store to.
"""
_path: Path = Path(path) _path: Path = Path(path)
_path.parent.mkdir(exist_ok=True, parents=True) _path.parent.mkdir(exist_ok=True, parents=True)
with _path.open("w") as f: with _path.open("w") as f:

View File

@ -1,6 +1,6 @@
"""Internal utilities for the in memory implementation of VectorStore. """Internal utilities for the in memory implementation of VectorStore.
These are part of a private API and users should not used them directly These are part of a private API, and users should not use them directly
as they can change without notice. as they can change without notice.
""" """
@ -18,7 +18,20 @@ logger = logging.getLogger(__name__)
def _cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray: def _cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
"""Row-wise cosine similarity between two equal-width matrices.""" """Row-wise cosine similarity between two equal-width matrices.
Args:
X: A matrix of shape (n, m).
Y: A matrix of shape (k, m).
Returns:
A matrix of shape (n, k) where each element (i, j) is the cosine similarity
between the ith row of X and the jth row of Y.
Raises:
ValueError: If the number of columns in X and Y are not the same.
ImportError: If numpy is not installed.
"""
try: try:
import numpy as np import numpy as np
except ImportError: except ImportError:
@ -64,7 +77,20 @@ def _maximal_marginal_relevance(
lambda_mult: float = 0.5, lambda_mult: float = 0.5,
k: int = 4, k: int = 4,
) -> List[int]: ) -> List[int]:
"""Calculate maximal marginal relevance.""" """Calculate maximal marginal relevance.
Args:
query_embedding: The query embedding.
embedding_list: A list of embeddings.
lambda_mult: The lambda parameter for MMR. Default is 0.5.
k: The number of embeddings to return. Default is 4.
Returns:
A list of indices of the embeddings to return.
Raises:
ImportError: If numpy is not installed.
"""
try: try:
import numpy as np import numpy as np
except ImportError: except ImportError: