AstraDB VectorStore: implement pre_delete_collection (#13780)

- **Description:** some vector stores have a flag for try deleting the
collection before creating it (such as ´vectorpg´). This is a useful
flag when prototyping indexing pipelines and also for integration tests.
Added the bool flag `pre_delete_collection ` to the constructor (default
False)
  - **Tag maintainer:** @hemidactylus 
  - **Twitter handle:** nicoloboschi

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
Nicolò Boschi 2023-12-03 21:06:20 +01:00 committed by GitHub
parent 2780d2d4dd
commit e204657b3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 79 additions and 36 deletions

View File

@ -78,7 +78,7 @@ class AstraDB(VectorStore):
vectorstore.add_texts(["Giraffes", "All good here"])
results = vectorstore.similarity_search("Everything's ok", k=1)
Constructor args (only keyword-arguments accepted):
Constructor Args (only keyword-arguments accepted):
embedding (Embeddings): embedding function to use.
collection_name (str): name of the Astra DB collection to create/use.
token (Optional[str]): API token for Astra DB usage.
@ -101,6 +101,9 @@ class AstraDB(VectorStore):
threads in a batch to insert pre-existing entries.
bulk_delete_concurrency (Optional[int]): Number of threads
(for deleting multiple rows concurrently).
pre_delete_collection (Optional[bool]): whether to delete the collection
before creating it. If False and the collection already exists,
the collection will be used as is.
A note on concurrency: as a rule of thumb, on a typical client machine
it is suggested to keep the quantity
@ -138,6 +141,7 @@ class AstraDB(VectorStore):
bulk_insert_batch_concurrency: Optional[int] = None,
bulk_insert_overwrite_concurrency: Optional[int] = None,
bulk_delete_concurrency: Optional[int] = None,
pre_delete_collection: bool = False,
) -> None:
"""
Create an AstraDB vector store object. See class docstring for help.
@ -154,6 +158,7 @@ class AstraDB(VectorStore):
"Could not import a recent astrapy python package. "
"Please install it with `pip install --upgrade astrapy`."
)
# Conflicting-arg checks:
if astra_db_client is not None:
if token is not None or api_endpoint is not None:
@ -191,7 +196,10 @@ class AstraDB(VectorStore):
api_endpoint=self.api_endpoint,
namespace=self.namespace,
)
if not pre_delete_collection:
self._provision_collection()
else:
self.clear()
self.collection = LibAstraDBCollection(
collection_name=self.collection_name,

View File

@ -148,6 +148,41 @@ class TestAstraDB:
)
v_store_2.delete_collection()
def test_astradb_vectorstore_pre_delete_collection(self) -> None:
"""Create and delete."""
emb = SomeEmbeddings(dimension=2)
# creation by passing the connection secrets
v_store = AstraDB(
embedding=emb,
collection_name="lc_test_pre_del",
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
namespace=os.environ.get("ASTRA_DB_KEYSPACE"),
)
try:
v_store.add_texts(
texts=["aa"],
metadatas=[
{"k": "a", "ord": 0},
],
ids=["a"],
)
res1 = v_store.similarity_search("aa", k=5)
assert len(res1) == 1
v_store = AstraDB(
embedding=emb,
pre_delete_collection=True,
collection_name="lc_test_pre_del",
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
namespace=os.environ.get("ASTRA_DB_KEYSPACE"),
)
res1 = v_store.similarity_search("aa", k=5)
assert len(res1) == 0
finally:
v_store.delete_collection()
def test_astradb_vectorstore_from_x(self) -> None:
"""from_texts and from_documents methods."""
emb = SomeEmbeddings(dimension=2)