mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-02 19:47:13 +00:00
LangChain Community: VectorStores: Azure Cosmos DB Filtered Vector Search (#24087)
Thank you for contributing to LangChain! - This PR adds vector search filtering for Azure Cosmos DB Mongo vCore and NoSQL. - [ ] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** a description of the change - **Issue:** the issue # it fixes, if applicable - **Dependencies:** any dependencies required for this change - **Twitter handle:** if your PR gets announced, and you'd like a mention, we'll gladly shout you out! - [ ] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17.
This commit is contained in:
@@ -25,7 +25,7 @@ model_name = os.getenv("OPENAI_EMBEDDINGS_MODEL_NAME", "text-embedding-ada-002")
|
||||
INDEX_NAME = "langchain-test-index"
|
||||
INDEX_NAME_VECTOR_HNSW = "langchain-test-index-hnsw"
|
||||
NAMESPACE = "langchain_test_db.langchain_test_collection"
|
||||
CONNECTION_STRING: str = os.environ.get("MONGODB_VCORE_URI", "")
|
||||
CONNECTION_STRING: str = "mongodb+srv://akataria:Basket24ball@akataria-vector-search-testing.mongocluster.cosmos.azure.com/?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000"
|
||||
DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")
|
||||
|
||||
num_lists = 3
|
||||
|
@@ -104,6 +104,7 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
),
|
||||
indexing_policy=get_vector_indexing_policy("flat"),
|
||||
cosmos_container_properties={"partition_key": partition_key},
|
||||
cosmos_database_properties={},
|
||||
)
|
||||
sleep(1) # waits for Cosmos DB to save contents to the collection
|
||||
|
||||
@@ -139,6 +140,7 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
),
|
||||
indexing_policy=get_vector_indexing_policy("flat"),
|
||||
cosmos_container_properties={"partition_key": partition_key},
|
||||
cosmos_database_properties={},
|
||||
)
|
||||
sleep(1) # waits for Cosmos DB to save contents to the collection
|
||||
|
||||
@@ -154,3 +156,60 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
assert output2
|
||||
assert output2[0].page_content != "Dogs are tough."
|
||||
safe_delete_database(cosmos_client)
|
||||
|
||||
def test_from_documents_cosine_distance_with_filtering(
|
||||
self,
|
||||
cosmos_client: Any,
|
||||
partition_key: Any,
|
||||
azure_openai_embeddings: OpenAIEmbeddings,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
documents = [
|
||||
Document(page_content="Dogs are tough.", metadata={"a": 1}),
|
||||
Document(page_content="Cats have fluff.", metadata={"a": 1}),
|
||||
Document(page_content="What is a sandwich?", metadata={"c": 1}),
|
||||
Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
|
||||
]
|
||||
|
||||
store = AzureCosmosDBNoSqlVectorSearch.from_documents(
|
||||
documents,
|
||||
azure_openai_embeddings,
|
||||
cosmos_client=cosmos_client,
|
||||
database_name=database_name,
|
||||
container_name=container_name,
|
||||
vector_embedding_policy=get_vector_embedding_policy(
|
||||
"cosine", "float32", 400
|
||||
),
|
||||
indexing_policy=get_vector_indexing_policy("flat"),
|
||||
cosmos_container_properties={"partition_key": partition_key},
|
||||
cosmos_database_properties={},
|
||||
)
|
||||
sleep(1) # waits for Cosmos DB to save contents to the collection
|
||||
|
||||
output = store.similarity_search("Dogs", k=4)
|
||||
assert len(output) == 4
|
||||
assert output[0].page_content == "Dogs are tough."
|
||||
assert output[0].metadata["a"] == 1
|
||||
|
||||
pre_filter = {
|
||||
"where_clause": "WHERE c.metadata.a=1",
|
||||
}
|
||||
output = store.similarity_search(
|
||||
"Dogs", k=4, pre_filter=pre_filter, with_embedding=True
|
||||
)
|
||||
|
||||
assert len(output) == 2
|
||||
assert output[0].page_content == "Dogs are tough."
|
||||
assert output[0].metadata["a"] == 1
|
||||
|
||||
pre_filter = {
|
||||
"where_clause": "WHERE c.metadata.a=1",
|
||||
"limit_offset_clause": "OFFSET 0 LIMIT 1",
|
||||
}
|
||||
|
||||
output = store.similarity_search("Dogs", k=4, pre_filter=pre_filter)
|
||||
|
||||
assert len(output) == 1
|
||||
assert output[0].page_content == "Dogs are tough."
|
||||
assert output[0].metadata["a"] == 1
|
||||
safe_delete_database(cosmos_client)
|
||||
|
Reference in New Issue
Block a user