community: VectorStores: Azure Cosmos DB Mongo vCore with DiskANN (#27329)

# Description
Add a new vector index type `diskann` to Azure Cosmos DB Mongo vCore
vector store. Paper of DiskANN can be found here [DiskANN: Fast Accurate
Billion-point Nearest Neighbor Search on a Single
Node](https://proceedings.neurips.cc/paper_files/paper/2019/file/09853c7fb1d3f8ee67a61b6bf4a7f8e6-Paper.pdf).

## Sample Usage
```python
from pymongo import MongoClient

# INDEX_NAME = "izzy-test-index-2"
# NAMESPACE = "izzy_test_db.izzy_test_collection"
# DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")

client: MongoClient = MongoClient(CONNECTION_STRING)
collection = client[DB_NAME][COLLECTION_NAME]

model_deployment = os.getenv(
    "OPENAI_EMBEDDINGS_DEPLOYMENT", "smart-agent-embedding-ada"
)
model_name = os.getenv("OPENAI_EMBEDDINGS_MODEL_NAME", "text-embedding-ada-002")

vectorstore = AzureCosmosDBVectorSearch.from_documents(
    docs,
    openai_embeddings,
    collection=collection,
    index_name=INDEX_NAME,
)

# Read more about these variables in detail here. https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search
maxDegree = 40
dimensions = 1536
similarity_algorithm = CosmosDBSimilarityType.COS
kind = CosmosDBVectorSearchType.VECTOR_DISKANN
lBuild = 20

vectorstore.create_index(
            dimensions=dimensions,
            similarity=similarity_algorithm,
            kind=kind ,
            max_degree=maxDegree,
            l_build=lBuild,
        )
```

## Dependencies
No additional dependencies were added

---------

Co-authored-by: Yang Qiao (from Dev Box) <yangqiao@microsoft.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
fatmelon
2024-12-12 09:54:04 +08:00
committed by GitHub
parent ba9b95cd23
commit d1e0ec7b55
3 changed files with 703 additions and 59 deletions

View File

@@ -8,7 +8,7 @@ from typing import Any, Generator, Optional, Union
import pytest
from langchain_core.documents import Document
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import AzureOpenAIEmbeddings
from langchain_community.vectorstores.azure_cosmos_db import (
AzureCosmosDBVectorSearch,
CosmosDBSimilarityType,
@@ -24,6 +24,7 @@ model_name = os.getenv("OPENAI_EMBEDDINGS_MODEL_NAME", "text-embedding-ada-002")
INDEX_NAME = "langchain-test-index"
INDEX_NAME_VECTOR_HNSW = "langchain-test-index-hnsw"
INDEX_NAME_VECTOR_DISKANN = "langchain-test-index-diskann"
NAMESPACE = "langchain_test_db.langchain_test_collection"
CONNECTION_STRING: str = os.environ.get("MONGODB_VCORE_URI", "")
DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")
@@ -36,6 +37,9 @@ m = 16
ef_construction = 64
ef_search = 40
score_threshold = 0.1
maxDegree = 50
lBuild = 40
lSearch = 100
application_name = "LANGCHAIN_PYTHON"
@@ -53,8 +57,9 @@ def collection() -> Any:
@pytest.fixture()
def azure_openai_embeddings() -> Any:
openai_embeddings: OpenAIEmbeddings = OpenAIEmbeddings(
deployment=model_deployment, model=model_name, chunk_size=1
openai_embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
model=model_name,
chunk_size=1,
)
return openai_embeddings
@@ -70,8 +75,12 @@ pytest tests/integration_tests/vectorstores/test_azure_cosmos_db.py
class TestAzureCosmosDBVectorSearch:
@classmethod
def setup_class(cls) -> None:
if not os.getenv("OPENAI_API_KEY"):
raise ValueError("OPENAI_API_KEY environment variable is not set")
if not os.getenv("AZURE_OPENAI_API_KEY"):
raise ValueError("AZURE_OPENAI_API_KEY environment variable is not set")
if not os.getenv("AZURE_OPENAI_ENDPOINT"):
raise ValueError("AZURE_OPENAI_ENDPOINT environment variable is not set")
if not os.getenv("AZURE_OPENAI_API_VERSION"):
raise ValueError("AZURE_OPENAI_API_VERSION environment variable is not set")
# insure the test collection is empty
collection = prepare_collection()
@@ -95,7 +104,7 @@ class TestAzureCosmosDBVectorSearch:
return "805.555.1212"
def test_from_documents_cosine_distance(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
"""Test end to end construction and search."""
documents = [
@@ -135,7 +144,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_documents_inner_product(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
"""Test end to end construction and search."""
documents = [
@@ -174,7 +183,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_texts_cosine_distance(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
@@ -208,7 +217,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_texts_with_metadatas_cosine_distance(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
@@ -246,7 +255,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_texts_with_metadatas_delete_one(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
@@ -280,7 +289,6 @@ class TestAzureCosmosDBVectorSearch:
assert output
assert output[0].page_content == "What is a sandwich?"
assert output[0].metadata["c"] == 1
first_document_id_object = output[0].metadata["_id"]
first_document_id = str(first_document_id_object)
@@ -300,7 +308,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_texts_with_metadatas_delete_multiple(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
@@ -359,7 +367,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_texts_with_metadatas_inner_product(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
@@ -397,7 +405,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_texts_with_metadatas_euclidean_distance(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
@@ -435,7 +443,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_max_marginal_relevance_cosine_distance(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = ["foo", "foo", "fou", "foy"]
vectorstore = AzureCosmosDBVectorSearch.from_texts(
@@ -453,7 +461,12 @@ class TestAzureCosmosDBVectorSearch:
query = "foo"
output = vectorstore.max_marginal_relevance_search(
query, k=10, kind=kind, lambda_mult=0.1, score_threshold=score_threshold
query,
k=10,
kind=kind,
lambda_mult=0.1,
score_threshold=score_threshold,
with_embedding=True,
)
assert len(output) == len(texts)
@@ -463,7 +476,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_max_marginal_relevance_inner_product(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = ["foo", "foo", "fou", "foy"]
vectorstore = AzureCosmosDBVectorSearch.from_texts(
@@ -481,7 +494,12 @@ class TestAzureCosmosDBVectorSearch:
query = "foo"
output = vectorstore.max_marginal_relevance_search(
query, k=10, kind=kind, lambda_mult=0.1, score_threshold=score_threshold
query,
k=10,
kind=kind,
lambda_mult=0.1,
score_threshold=score_threshold,
with_embedding=True,
)
assert len(output) == len(texts)
@@ -495,7 +513,7 @@ class TestAzureCosmosDBVectorSearch:
"""
def test_from_documents_cosine_distance_vector_hnsw(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
"""Test end to end construction and search."""
documents = [
@@ -539,7 +557,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_documents_inner_product_vector_hnsw(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
"""Test end to end construction and search."""
documents = [
@@ -583,7 +601,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_texts_cosine_distance_vector_hnsw(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
@@ -622,7 +640,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_texts_with_metadatas_cosine_distance_vector_hnsw(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
@@ -665,7 +683,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_texts_with_metadatas_delete_one_vector_hnsw(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
@@ -724,7 +742,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_texts_with_metadatas_delete_multiple_vector_hnsw(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
@@ -788,7 +806,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_from_texts_with_metadatas_inner_product_vector_hnsw(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
@@ -831,7 +849,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_max_marginal_relevance_cosine_distance_vector_hnsw(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = ["foo", "foo", "fou", "foy"]
vectorstore = AzureCosmosDBVectorSearch.from_texts(
@@ -859,6 +877,7 @@ class TestAzureCosmosDBVectorSearch:
kind=CosmosDBVectorSearchType.VECTOR_HNSW,
lambda_mult=0.1,
score_threshold=score_threshold,
with_embedding=True,
)
assert len(output) == len(texts)
@@ -868,7 +887,7 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_index()
def test_max_marginal_relevance_inner_product_vector_hnsw(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = ["foo", "foo", "fou", "foy"]
vectorstore = AzureCosmosDBVectorSearch.from_texts(
@@ -896,6 +915,405 @@ class TestAzureCosmosDBVectorSearch:
kind=CosmosDBVectorSearchType.VECTOR_HNSW,
lambda_mult=0.1,
score_threshold=score_threshold,
with_embedding=True,
)
assert len(output) == len(texts)
assert output[0].page_content == "foo"
assert output[1].page_content != "foo"
vectorstore.delete_index()
"""
Test cases for the similarity algorithm using vector-diskann
"""
def test_from_documents_cosine_distance_vector_diskann(
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
"""Test end to end construction and search."""
documents = [
Document(page_content="Dogs are tough.", metadata={"a": 1}),
Document(page_content="Cats have fluff.", metadata={"b": 1}),
Document(page_content="What is a sandwich?", metadata={"c": 1}),
Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
]
vectorstore = AzureCosmosDBVectorSearch.from_documents(
documents,
azure_openai_embeddings,
collection=collection,
index_name=INDEX_NAME_VECTOR_DISKANN,
)
sleep(1) # waits for Cosmos DB to save contents to the collection
# Create the DiskANN index that will be leveraged later for vector search
vectorstore.create_index(
dimensions=dimensions,
similarity=similarity_algorithm,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
max_degree=maxDegree,
l_build=lBuild,
)
sleep(2) # waits for the index to be set up
output = vectorstore.similarity_search(
"Sandwich",
k=1,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
lSearch=lSearch,
)
assert output
assert output[0].page_content == "What is a sandwich?"
assert output[0].metadata["c"] == 1
vectorstore.delete_index()
def test_from_documents_inner_product_vector_diskann(
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
"""Test end to end construction and search."""
documents = [
Document(page_content="Dogs are tough.", metadata={"a": 1}),
Document(page_content="Cats have fluff.", metadata={"b": 1}),
Document(page_content="What is a sandwich?", metadata={"c": 1}),
Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
]
vectorstore = AzureCosmosDBVectorSearch.from_documents(
documents,
azure_openai_embeddings,
collection=collection,
index_name=INDEX_NAME_VECTOR_DISKANN,
)
sleep(1) # waits for Cosmos DB to save contents to the collection
# Create the DiskANN index that will be leveraged later for vector search
vectorstore.create_index(
dimensions=dimensions,
similarity=similarity_algorithm,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
max_degree=maxDegree,
l_build=lBuild,
)
sleep(2) # waits for the index to be set up
output = vectorstore.similarity_search(
"Sandwich",
k=1,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
lSearch=lSearch,
)
assert output
assert output[0].page_content == "What is a sandwich?"
assert output[0].metadata["c"] == 1
vectorstore.delete_index()
def test_from_texts_cosine_distance_vector_diskann(
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
"Cats have fluff.",
"What is a sandwich?",
"That fence is purple.",
]
vectorstore = AzureCosmosDBVectorSearch.from_texts(
texts,
azure_openai_embeddings,
collection=collection,
index_name=INDEX_NAME_VECTOR_DISKANN,
)
# Create the DiskANN index that will be leveraged later for vector search
vectorstore.create_index(
dimensions=dimensions,
similarity=similarity_algorithm,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
max_degree=maxDegree,
l_build=lBuild,
)
sleep(2) # waits for the index to be set up
output = vectorstore.similarity_search(
"Sandwich",
k=1,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
lSearch=lSearch,
)
assert output[0].page_content == "What is a sandwich?"
vectorstore.delete_index()
def test_from_texts_with_metadatas_cosine_distance_vector_diskann(
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
"Cats have fluff.",
"What is a sandwich?",
"The fence is purple.",
]
metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
vectorstore = AzureCosmosDBVectorSearch.from_texts(
texts,
azure_openai_embeddings,
metadatas=metadatas,
collection=collection,
index_name=INDEX_NAME_VECTOR_DISKANN,
)
# Create the DiskANN index that will be leveraged later for vector search
vectorstore.create_index(
dimensions=dimensions,
similarity=similarity_algorithm,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
max_degree=maxDegree,
l_build=lBuild,
)
sleep(2) # waits for the index to be set up
output = vectorstore.similarity_search(
"Sandwich",
k=1,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
lSearch=lSearch,
)
assert output
assert output[0].page_content == "What is a sandwich?"
assert output[0].metadata["c"] == 1
vectorstore.delete_index()
def test_from_texts_with_metadatas_delete_one_vector_diskann(
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
"Cats have fluff.",
"What is a sandwich?",
"The fence is purple.",
]
metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
vectorstore = AzureCosmosDBVectorSearch.from_texts(
texts,
azure_openai_embeddings,
metadatas=metadatas,
collection=collection,
index_name=INDEX_NAME_VECTOR_DISKANN,
)
# Create the DiskANN index that will be leveraged later for vector search
vectorstore.create_index(
dimensions=dimensions,
similarity=similarity_algorithm,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
max_degree=maxDegree,
l_build=lBuild,
)
sleep(2) # waits for the index to be set up
output = vectorstore.similarity_search(
"Sandwich",
k=1,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
lSearch=lSearch,
)
assert output
assert output[0].page_content == "What is a sandwich?"
assert output[0].metadata["c"] == 1
first_document_id_object = output[0].metadata["_id"]
first_document_id = str(first_document_id_object)
vectorstore.delete_document_by_id(first_document_id)
sleep(2) # waits for the index to be updated
output2 = vectorstore.similarity_search(
"Sandwich",
k=1,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
lSearch=lSearch,
)
assert output2
assert output2[0].page_content != "What is a sandwich?"
vectorstore.delete_index()
def test_from_texts_with_metadatas_delete_multiple_vector_diskann(
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
"Cats have fluff.",
"What is a sandwich?",
"The fence is purple.",
]
metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
vectorstore = AzureCosmosDBVectorSearch.from_texts(
texts,
azure_openai_embeddings,
metadatas=metadatas,
collection=collection,
index_name=INDEX_NAME_VECTOR_DISKANN,
)
# Create the DiskANN index that will be leveraged later for vector search
vectorstore.create_index(
dimensions=dimensions,
similarity=similarity_algorithm,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
max_degree=maxDegree,
l_build=lBuild,
)
sleep(2) # waits for the index to be set up
output = vectorstore.similarity_search(
"Sandwich",
k=5,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
lSearch=lSearch,
)
first_document_id = str(output[0].metadata["_id"])
second_document_id = str(output[1].metadata["_id"])
third_document_id = str(output[2].metadata["_id"])
document_ids = [first_document_id, second_document_id, third_document_id]
vectorstore.delete(document_ids)
sleep(2) # waits for the index to be updated
output_2 = vectorstore.similarity_search(
"Sandwich",
k=5,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
lSearch=lSearch,
)
assert output
assert output_2
assert len(output) == 4 # we should see all the four documents
assert (
len(output_2) == 1
) # we should see only one document left after three have been deleted
vectorstore.delete_index()
def test_from_texts_with_metadatas_inner_product_vector_diskann(
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = [
"Dogs are tough.",
"Cats have fluff.",
"What is a sandwich?",
"The fence is purple.",
]
metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
vectorstore = AzureCosmosDBVectorSearch.from_texts(
texts,
azure_openai_embeddings,
metadatas=metadatas,
collection=collection,
index_name=INDEX_NAME_VECTOR_DISKANN,
)
# Create the DiskANN index that will be leveraged later for vector search
vectorstore.create_index(
dimensions=dimensions,
similarity=similarity_algorithm,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
max_degree=maxDegree,
l_build=lBuild,
)
sleep(2) # waits for the index to be set up
output = vectorstore.similarity_search(
"Sandwich",
k=1,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
lSearch=lSearch,
)
assert output
assert output[0].page_content == "What is a sandwich?"
assert output[0].metadata["c"] == 1
vectorstore.delete_index()
def test_max_marginal_relevance_cosine_distance_vector_diskann(
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = ["foo", "foo", "fou", "foy"]
vectorstore = AzureCosmosDBVectorSearch.from_texts(
texts,
azure_openai_embeddings,
collection=collection,
index_name=INDEX_NAME_VECTOR_DISKANN,
)
# Create the IVF index that will be leveraged later for vector search
vectorstore.create_index(
dimensions=dimensions,
similarity=similarity_algorithm,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
max_degree=maxDegree,
l_build=lBuild,
)
sleep(2) # waits for the index to be set up
query = "foo"
output = vectorstore.max_marginal_relevance_search(
query,
k=10,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
lambda_mult=0.1,
lSearch=lSearch,
with_embedding=True,
)
assert len(output) == len(texts)
assert output[0].page_content == "foo"
assert output[1].page_content != "foo"
vectorstore.delete_index()
def test_max_marginal_relevance_inner_product_vector_diskann(
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
texts = ["foo", "foo", "fou", "foy"]
vectorstore = AzureCosmosDBVectorSearch.from_texts(
texts,
azure_openai_embeddings,
collection=collection,
index_name=INDEX_NAME_VECTOR_DISKANN,
)
# Create the DiskANN index that will be leveraged later for vector search
vectorstore.create_index(
dimensions=dimensions,
similarity=similarity_algorithm,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
max_degree=maxDegree,
l_build=lBuild,
)
sleep(2) # waits for the index to be set up
query = "foo"
output = vectorstore.max_marginal_relevance_search(
query,
k=10,
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
lambda_mult=0.1,
lSearch=lSearch,
with_embedding=True,
)
assert len(output) == len(texts)
@@ -906,7 +1324,7 @@ class TestAzureCosmosDBVectorSearch:
@staticmethod
def invoke_delete_with_no_args(
azure_openai_embeddings: OpenAIEmbeddings, collection: Any
azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> Optional[bool]:
vectorstore: AzureCosmosDBVectorSearch = (
AzureCosmosDBVectorSearch.from_connection_string(
@@ -922,7 +1340,7 @@ class TestAzureCosmosDBVectorSearch:
@staticmethod
def invoke_delete_by_id_with_no_args(
azure_openai_embeddings: OpenAIEmbeddings, collection: Any
azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
vectorstore: AzureCosmosDBVectorSearch = (
AzureCosmosDBVectorSearch.from_connection_string(
@@ -937,14 +1355,14 @@ class TestAzureCosmosDBVectorSearch:
vectorstore.delete_document_by_id()
def test_invalid_arguments_to_delete(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
with pytest.raises(ValueError) as exception_info:
self.invoke_delete_with_no_args(azure_openai_embeddings, collection)
assert str(exception_info.value) == "No document ids provided to delete."
def test_no_arguments_to_delete_by_id(
self, azure_openai_embeddings: OpenAIEmbeddings, collection: Any
self, azure_openai_embeddings: AzureOpenAIEmbeddings, collection: Any
) -> None:
with pytest.raises(Exception) as exception_info:
self.invoke_delete_by_id_with_no_args(