Compare commits

...

2 Commits

Author SHA1 Message Date
mcantillon21
1bceb054bd lint fixes 2023-08-28 12:26:07 -07:00
mcantillon21
2dd2bdede3 adding batching 2023-08-28 12:16:53 -07:00

View File

@@ -88,35 +88,47 @@ class VectorStore(ABC):
) -> List[str]: ) -> List[str]:
"""Run more texts through the embeddings and add to the vectorstore.""" """Run more texts through the embeddings and add to the vectorstore."""
raise NotImplementedError raise NotImplementedError
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]: def add_documents(self, documents: List[Document], batch_size: int = 100, **kwargs: Any
"""Run more documents through the embeddings and add to the vectorstore. ) -> List[str]:
"""Run more documents through the embeddings and add to the vectorstore
in batches.
Args: Args:
documents (List[Document]: Documents to add to the vectorstore. documents (List[Document]: Documents to add to the vectorstore.
batch_size (int): Size of the batch to add to the vectorstore.
Returns: Returns:
List[str]: List of IDs of the added texts. List[str]: List of IDs of the added texts.
""" """
# TODO: Handle the case where the user doesn't provide ids on the Collection # TODO: Handle the case where the user doesn't provide ids on the Collection
texts = [doc.page_content for doc in documents] ids = []
metadatas = [doc.metadata for doc in documents] for i in range(0, len(documents), batch_size):
return self.add_texts(texts, metadatas, **kwargs) batch = documents[i:i+batch_size]
texts = [doc.page_content for doc in batch]
metadatas = [doc.metadata for doc in batch]
ids.extend(self.add_texts(texts, metadatas, **kwargs))
return ids
async def aadd_documents( async def aadd_documents(
self, documents: List[Document], **kwargs: Any self, documents: List[Document], batch_size: int = 100, **kwargs: Any
) -> List[str]: ) -> List[str]:
"""Run more documents through the embeddings and add to the vectorstore. """Run more documents through the embeddings and add to the vectorstore
in batches.
Args: Args:
documents (List[Document]: Documents to add to the vectorstore. documents (List[Document]: Documents to add to the vectorstore.
Returns: Returns:
List[str]: List of IDs of the added texts. List[str]: List of IDs of the added texts.
""" """
texts = [doc.page_content for doc in documents] ids = []
metadatas = [doc.metadata for doc in documents] for i in range(0, len(documents), batch_size):
return await self.aadd_texts(texts, metadatas, **kwargs) batch = documents[i:i+batch_size]
texts = [doc.page_content for doc in batch]
metadatas = [doc.metadata for doc in batch]
ids.extend(await self.aadd_texts(texts, metadatas, **kwargs))
return ids
def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]: def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
"""Return docs most similar to query using specified search type.""" """Return docs most similar to query using specified search type."""