Compare commits

...

2 Commits

Author SHA1 Message Date
mcantillon21
1bceb054bd lint fixes 2023-08-28 12:26:07 -07:00
mcantillon21
2dd2bdede3 adding batching 2023-08-28 12:16:53 -07:00

View File

@@ -88,35 +88,47 @@ class VectorStore(ABC):
) -> List[str]:
"""Run more texts through the embeddings and add to the vectorstore."""
raise NotImplementedError
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
"""Run more documents through the embeddings and add to the vectorstore.
def add_documents(self, documents: List[Document], batch_size: int = 100, **kwargs: Any
) -> List[str]:
"""Run more documents through the embeddings and add to the vectorstore
in batches.
Args:
documents (List[Document]: Documents to add to the vectorstore.
batch_size (int): Size of the batch to add to the vectorstore.
Returns:
List[str]: List of IDs of the added texts.
"""
# TODO: Handle the case where the user doesn't provide ids on the Collection
texts = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents]
return self.add_texts(texts, metadatas, **kwargs)
ids = []
for i in range(0, len(documents), batch_size):
batch = documents[i:i+batch_size]
texts = [doc.page_content for doc in batch]
metadatas = [doc.metadata for doc in batch]
ids.extend(self.add_texts(texts, metadatas, **kwargs))
return ids
async def aadd_documents(
self, documents: List[Document], **kwargs: Any
self, documents: List[Document], batch_size: int = 100, **kwargs: Any
) -> List[str]:
"""Run more documents through the embeddings and add to the vectorstore.
"""Run more documents through the embeddings and add to the vectorstore
in batches.
Args:
documents (List[Document]: Documents to add to the vectorstore.
Returns:
List[str]: List of IDs of the added texts.
"""
texts = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents]
return await self.aadd_texts(texts, metadatas, **kwargs)
ids = []
for i in range(0, len(documents), batch_size):
batch = documents[i:i+batch_size]
texts = [doc.page_content for doc in batch]
metadatas = [doc.metadata for doc in batch]
ids.extend(await self.aadd_texts(texts, metadatas, **kwargs))
return ids
def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
"""Return docs most similar to query using specified search type."""