community[minor]: Adding Azure Cosmos Mongo vCore Vector DB Cache (#16856)

Description: This pull request introduces several enhancements for Azure Cosmos Vector DB, primarily focused on improving caching and search capabilities using Azure Cosmos MongoDB vCore Vector DB. Here's a summary of the changes: - **AzureCosmosDBSemanticCache**: Added a new cache implementation called AzureCosmosDBSemanticCache, which utilizes Azure Cosmos MongoDB vCore Vector DB for efficient caching of semantic data. Added comprehensive test cases for AzureCosmosDBSemanticCache to ensure its correctness and robustness. These tests cover various scenarios and edge cases to validate the cache's behavior. - **HNSW Vector Search**: Added HNSW vector search functionality in the CosmosDB Vector Search module. This enhancement enables more efficient and accurate vector searches by utilizing the HNSW (Hierarchical Navigable Small World) algorithm. Added corresponding test cases to validate the HNSW vector search functionality in both AzureCosmosDBSemanticCache and AzureCosmosDBVectorSearch. These tests ensure the correctness and performance of the HNSW search algorithm. - **LLM Caching Notebook** - The notebook now includes a comprehensive example showcasing the usage of the AzureCosmosDBSemanticCache. This example highlights how the cache can be employed to efficiently store and retrieve semantic data. Additionally, the example provides default values for all parameters used within the AzureCosmosDBSemanticCache, ensuring clarity and ease of understanding for users who are new to the cache implementation. @hwchase17,@baskaryan, @eyurtsev,
2025-09-23 19:39:58 +00:00 · 2024-03-03 14:04:15 -08:00
parent db47b5deee
commit 7c2f3f6f95
6 changed files with 1507 additions and 126 deletions
--- a/libs/langchain/tests/integration_tests/cache/test_azure_cosmosdb_cache.py
+++ b/libs/langchain/tests/integration_tests/cache/test_azure_cosmosdb_cache.py
@@ -0,0 +1,350 @@
+"""Test Azure CosmosDB cache functionality.
+
+Required to run this test:
+    - a recent 'pymongo' Python package available
+    - an Azure CosmosDB Mongo vCore instance
+    - one environment variable set:
+        export MONGODB_VCORE_URI="connection string for azure cosmos db mongo vCore"
+"""
+import os
+import uuid
+
+import pytest
+from langchain_community.cache import AzureCosmosDBSemanticCache
+from langchain_community.vectorstores.azure_cosmos_db import (
+    CosmosDBSimilarityType,
+    CosmosDBVectorSearchType,
+)
+from langchain_core.outputs import Generation
+
+from langchain.globals import get_llm_cache, set_llm_cache
+from tests.integration_tests.cache.fake_embeddings import (
+    FakeEmbeddings,
+)
+from tests.unit_tests.llms.fake_llm import FakeLLM
+
+INDEX_NAME = "langchain-test-index"
+NAMESPACE = "langchain_test_db.langchain_test_collection"
+CONNECTION_STRING: str = os.environ.get("MONGODB_VCORE_URI", "")
+DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")
+
+num_lists = 3
+dimensions = 10
+similarity_algorithm = CosmosDBSimilarityType.COS
+kind = CosmosDBVectorSearchType.VECTOR_IVF
+m = 16
+ef_construction = 64
+ef_search = 40
+score_threshold = 0.1
+
+
+def _has_env_vars() -> bool:
+    return all(["MONGODB_VCORE_URI" in os.environ])
+
+
+def random_string() -> str:
+    return str(uuid.uuid4())
+
+
+@pytest.mark.requires("pymongo")
+@pytest.mark.skipif(
+    not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
+)
+def test_azure_cosmos_db_semantic_cache() -> None:
+    set_llm_cache(
+        AzureCosmosDBSemanticCache(
+            cosmosdb_connection_string=CONNECTION_STRING,
+            cosmosdb_client=None,
+            embedding=FakeEmbeddings(),
+            database_name=DB_NAME,
+            collection_name=COLLECTION_NAME,
+            num_lists=num_lists,
+            similarity=similarity_algorithm,
+            kind=kind,
+            dimensions=dimensions,
+            m=m,
+            ef_construction=ef_construction,
+            ef_search=ef_search,
+            score_threshold=score_threshold,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+@pytest.mark.requires("pymongo")
+@pytest.mark.skipif(
+    not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
+)
+def test_azure_cosmos_db_semantic_cache_inner_product() -> None:
+    set_llm_cache(
+        AzureCosmosDBSemanticCache(
+            cosmosdb_connection_string=CONNECTION_STRING,
+            cosmosdb_client=None,
+            embedding=FakeEmbeddings(),
+            database_name=DB_NAME,
+            collection_name=COLLECTION_NAME,
+            num_lists=num_lists,
+            similarity=CosmosDBSimilarityType.IP,
+            kind=kind,
+            dimensions=dimensions,
+            m=m,
+            ef_construction=ef_construction,
+            ef_search=ef_search,
+            score_threshold=score_threshold,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+@pytest.mark.requires("pymongo")
+@pytest.mark.skipif(
+    not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
+)
+def test_azure_cosmos_db_semantic_cache_multi() -> None:
+    set_llm_cache(
+        AzureCosmosDBSemanticCache(
+            cosmosdb_connection_string=CONNECTION_STRING,
+            cosmosdb_client=None,
+            embedding=FakeEmbeddings(),
+            database_name=DB_NAME,
+            collection_name=COLLECTION_NAME,
+            num_lists=num_lists,
+            similarity=similarity_algorithm,
+            kind=kind,
+            dimensions=dimensions,
+            m=m,
+            ef_construction=ef_construction,
+            ef_search=ef_search,
+            score_threshold=score_threshold,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+@pytest.mark.requires("pymongo")
+@pytest.mark.skipif(
+    not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
+)
+def test_azure_cosmos_db_semantic_cache_multi_inner_product() -> None:
+    set_llm_cache(
+        AzureCosmosDBSemanticCache(
+            cosmosdb_connection_string=CONNECTION_STRING,
+            cosmosdb_client=None,
+            embedding=FakeEmbeddings(),
+            database_name=DB_NAME,
+            collection_name=COLLECTION_NAME,
+            num_lists=num_lists,
+            similarity=CosmosDBSimilarityType.IP,
+            kind=kind,
+            dimensions=dimensions,
+            m=m,
+            ef_construction=ef_construction,
+            ef_search=ef_search,
+            score_threshold=score_threshold,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+@pytest.mark.requires("pymongo")
+@pytest.mark.skipif(
+    not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
+)
+def test_azure_cosmos_db_semantic_cache_hnsw() -> None:
+    set_llm_cache(
+        AzureCosmosDBSemanticCache(
+            cosmosdb_connection_string=CONNECTION_STRING,
+            cosmosdb_client=None,
+            embedding=FakeEmbeddings(),
+            database_name=DB_NAME,
+            collection_name=COLLECTION_NAME,
+            num_lists=num_lists,
+            similarity=similarity_algorithm,
+            kind=CosmosDBVectorSearchType.VECTOR_HNSW,
+            dimensions=dimensions,
+            m=m,
+            ef_construction=ef_construction,
+            ef_search=ef_search,
+            score_threshold=score_threshold,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+@pytest.mark.requires("pymongo")
+@pytest.mark.skipif(
+    not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
+)
+def test_azure_cosmos_db_semantic_cache_inner_product_hnsw() -> None:
+    set_llm_cache(
+        AzureCosmosDBSemanticCache(
+            cosmosdb_connection_string=CONNECTION_STRING,
+            cosmosdb_client=None,
+            embedding=FakeEmbeddings(),
+            database_name=DB_NAME,
+            collection_name=COLLECTION_NAME,
+            num_lists=num_lists,
+            similarity=CosmosDBSimilarityType.IP,
+            kind=CosmosDBVectorSearchType.VECTOR_HNSW,
+            dimensions=dimensions,
+            m=m,
+            ef_construction=ef_construction,
+            ef_search=ef_search,
+            score_threshold=score_threshold,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+@pytest.mark.requires("pymongo")
+@pytest.mark.skipif(
+    not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
+)
+def test_azure_cosmos_db_semantic_cache_multi_hnsw() -> None:
+    set_llm_cache(
+        AzureCosmosDBSemanticCache(
+            cosmosdb_connection_string=CONNECTION_STRING,
+            cosmosdb_client=None,
+            embedding=FakeEmbeddings(),
+            database_name=DB_NAME,
+            collection_name=COLLECTION_NAME,
+            num_lists=num_lists,
+            similarity=similarity_algorithm,
+            kind=CosmosDBVectorSearchType.VECTOR_HNSW,
+            dimensions=dimensions,
+            m=m,
+            ef_construction=ef_construction,
+            ef_search=ef_search,
+            score_threshold=score_threshold,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+@pytest.mark.requires("pymongo")
+@pytest.mark.skipif(
+    not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
+)
+def test_azure_cosmos_db_semantic_cache_multi_inner_product_hnsw() -> None:
+    set_llm_cache(
+        AzureCosmosDBSemanticCache(
+            cosmosdb_connection_string=CONNECTION_STRING,
+            cosmosdb_client=None,
+            embedding=FakeEmbeddings(),
+            database_name=DB_NAME,
+            collection_name=COLLECTION_NAME,
+            num_lists=num_lists,
+            similarity=CosmosDBSimilarityType.IP,
+            kind=CosmosDBVectorSearchType.VECTOR_HNSW,
+            dimensions=dimensions,
+            m=m,
+            ef_construction=ef_construction,
+            ef_search=ef_search,
+            score_threshold=score_threshold,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)