community[minor]: Adding Azure Cosmos Mongo vCore Vector DB Cache (#16856)

Description:

This pull request introduces several enhancements for Azure Cosmos
Vector DB, primarily focused on improving caching and search
capabilities using Azure Cosmos MongoDB vCore Vector DB. Here's a
summary of the changes:

- **AzureCosmosDBSemanticCache**: Added a new cache implementation
called AzureCosmosDBSemanticCache, which utilizes Azure Cosmos MongoDB
vCore Vector DB for efficient caching of semantic data. Added
comprehensive test cases for AzureCosmosDBSemanticCache to ensure its
correctness and robustness. These tests cover various scenarios and edge
cases to validate the cache's behavior.
- **HNSW Vector Search**: Added HNSW vector search functionality in the
CosmosDB Vector Search module. This enhancement enables more efficient
and accurate vector searches by utilizing the HNSW (Hierarchical
Navigable Small World) algorithm. Added corresponding test cases to
validate the HNSW vector search functionality in both
AzureCosmosDBSemanticCache and AzureCosmosDBVectorSearch. These tests
ensure the correctness and performance of the HNSW search algorithm.
- **LLM Caching Notebook** - The notebook now includes a comprehensive
example showcasing the usage of the AzureCosmosDBSemanticCache. This
example highlights how the cache can be employed to efficiently store
and retrieve semantic data. Additionally, the example provides default
values for all parameters used within the AzureCosmosDBSemanticCache,
ensuring clarity and ease of understanding for users who are new to the
cache implementation.
 
 @hwchase17,@baskaryan, @eyurtsev,
This commit is contained in:
Aayush Kataria
2024-03-03 14:04:15 -08:00
committed by GitHub
parent db47b5deee
commit 7c2f3f6f95
6 changed files with 1507 additions and 126 deletions

View File

@@ -0,0 +1,350 @@
"""Test Azure CosmosDB cache functionality.
Required to run this test:
- a recent 'pymongo' Python package available
- an Azure CosmosDB Mongo vCore instance
- one environment variable set:
export MONGODB_VCORE_URI="connection string for azure cosmos db mongo vCore"
"""
import os
import uuid
import pytest
from langchain_community.cache import AzureCosmosDBSemanticCache
from langchain_community.vectorstores.azure_cosmos_db import (
CosmosDBSimilarityType,
CosmosDBVectorSearchType,
)
from langchain_core.outputs import Generation
from langchain.globals import get_llm_cache, set_llm_cache
from tests.integration_tests.cache.fake_embeddings import (
FakeEmbeddings,
)
from tests.unit_tests.llms.fake_llm import FakeLLM
INDEX_NAME = "langchain-test-index"
NAMESPACE = "langchain_test_db.langchain_test_collection"
CONNECTION_STRING: str = os.environ.get("MONGODB_VCORE_URI", "")
DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")
num_lists = 3
dimensions = 10
similarity_algorithm = CosmosDBSimilarityType.COS
kind = CosmosDBVectorSearchType.VECTOR_IVF
m = 16
ef_construction = 64
ef_search = 40
score_threshold = 0.1
def _has_env_vars() -> bool:
return all(["MONGODB_VCORE_URI" in os.environ])
def random_string() -> str:
return str(uuid.uuid4())
@pytest.mark.requires("pymongo")
@pytest.mark.skipif(
not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
)
def test_azure_cosmos_db_semantic_cache() -> None:
set_llm_cache(
AzureCosmosDBSemanticCache(
cosmosdb_connection_string=CONNECTION_STRING,
cosmosdb_client=None,
embedding=FakeEmbeddings(),
database_name=DB_NAME,
collection_name=COLLECTION_NAME,
num_lists=num_lists,
similarity=similarity_algorithm,
kind=kind,
dimensions=dimensions,
m=m,
ef_construction=ef_construction,
ef_search=ef_search,
score_threshold=score_threshold,
)
)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
# foo and bar will have the same embedding produced by FakeEmbeddings
cache_output = get_llm_cache().lookup("bar", llm_string)
assert cache_output == [Generation(text="fizz")]
# clear the cache
get_llm_cache().clear(llm_string=llm_string)
@pytest.mark.requires("pymongo")
@pytest.mark.skipif(
not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
)
def test_azure_cosmos_db_semantic_cache_inner_product() -> None:
set_llm_cache(
AzureCosmosDBSemanticCache(
cosmosdb_connection_string=CONNECTION_STRING,
cosmosdb_client=None,
embedding=FakeEmbeddings(),
database_name=DB_NAME,
collection_name=COLLECTION_NAME,
num_lists=num_lists,
similarity=CosmosDBSimilarityType.IP,
kind=kind,
dimensions=dimensions,
m=m,
ef_construction=ef_construction,
ef_search=ef_search,
score_threshold=score_threshold,
)
)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
# foo and bar will have the same embedding produced by FakeEmbeddings
cache_output = get_llm_cache().lookup("bar", llm_string)
assert cache_output == [Generation(text="fizz")]
# clear the cache
get_llm_cache().clear(llm_string=llm_string)
@pytest.mark.requires("pymongo")
@pytest.mark.skipif(
not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
)
def test_azure_cosmos_db_semantic_cache_multi() -> None:
set_llm_cache(
AzureCosmosDBSemanticCache(
cosmosdb_connection_string=CONNECTION_STRING,
cosmosdb_client=None,
embedding=FakeEmbeddings(),
database_name=DB_NAME,
collection_name=COLLECTION_NAME,
num_lists=num_lists,
similarity=similarity_algorithm,
kind=kind,
dimensions=dimensions,
m=m,
ef_construction=ef_construction,
ef_search=ef_search,
score_threshold=score_threshold,
)
)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update(
"foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
)
# foo and bar will have the same embedding produced by FakeEmbeddings
cache_output = get_llm_cache().lookup("bar", llm_string)
assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
# clear the cache
get_llm_cache().clear(llm_string=llm_string)
@pytest.mark.requires("pymongo")
@pytest.mark.skipif(
not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
)
def test_azure_cosmos_db_semantic_cache_multi_inner_product() -> None:
set_llm_cache(
AzureCosmosDBSemanticCache(
cosmosdb_connection_string=CONNECTION_STRING,
cosmosdb_client=None,
embedding=FakeEmbeddings(),
database_name=DB_NAME,
collection_name=COLLECTION_NAME,
num_lists=num_lists,
similarity=CosmosDBSimilarityType.IP,
kind=kind,
dimensions=dimensions,
m=m,
ef_construction=ef_construction,
ef_search=ef_search,
score_threshold=score_threshold,
)
)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update(
"foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
)
# foo and bar will have the same embedding produced by FakeEmbeddings
cache_output = get_llm_cache().lookup("bar", llm_string)
assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
# clear the cache
get_llm_cache().clear(llm_string=llm_string)
@pytest.mark.requires("pymongo")
@pytest.mark.skipif(
not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
)
def test_azure_cosmos_db_semantic_cache_hnsw() -> None:
set_llm_cache(
AzureCosmosDBSemanticCache(
cosmosdb_connection_string=CONNECTION_STRING,
cosmosdb_client=None,
embedding=FakeEmbeddings(),
database_name=DB_NAME,
collection_name=COLLECTION_NAME,
num_lists=num_lists,
similarity=similarity_algorithm,
kind=CosmosDBVectorSearchType.VECTOR_HNSW,
dimensions=dimensions,
m=m,
ef_construction=ef_construction,
ef_search=ef_search,
score_threshold=score_threshold,
)
)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
# foo and bar will have the same embedding produced by FakeEmbeddings
cache_output = get_llm_cache().lookup("bar", llm_string)
assert cache_output == [Generation(text="fizz")]
# clear the cache
get_llm_cache().clear(llm_string=llm_string)
@pytest.mark.requires("pymongo")
@pytest.mark.skipif(
not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
)
def test_azure_cosmos_db_semantic_cache_inner_product_hnsw() -> None:
set_llm_cache(
AzureCosmosDBSemanticCache(
cosmosdb_connection_string=CONNECTION_STRING,
cosmosdb_client=None,
embedding=FakeEmbeddings(),
database_name=DB_NAME,
collection_name=COLLECTION_NAME,
num_lists=num_lists,
similarity=CosmosDBSimilarityType.IP,
kind=CosmosDBVectorSearchType.VECTOR_HNSW,
dimensions=dimensions,
m=m,
ef_construction=ef_construction,
ef_search=ef_search,
score_threshold=score_threshold,
)
)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
# foo and bar will have the same embedding produced by FakeEmbeddings
cache_output = get_llm_cache().lookup("bar", llm_string)
assert cache_output == [Generation(text="fizz")]
# clear the cache
get_llm_cache().clear(llm_string=llm_string)
@pytest.mark.requires("pymongo")
@pytest.mark.skipif(
not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
)
def test_azure_cosmos_db_semantic_cache_multi_hnsw() -> None:
set_llm_cache(
AzureCosmosDBSemanticCache(
cosmosdb_connection_string=CONNECTION_STRING,
cosmosdb_client=None,
embedding=FakeEmbeddings(),
database_name=DB_NAME,
collection_name=COLLECTION_NAME,
num_lists=num_lists,
similarity=similarity_algorithm,
kind=CosmosDBVectorSearchType.VECTOR_HNSW,
dimensions=dimensions,
m=m,
ef_construction=ef_construction,
ef_search=ef_search,
score_threshold=score_threshold,
)
)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update(
"foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
)
# foo and bar will have the same embedding produced by FakeEmbeddings
cache_output = get_llm_cache().lookup("bar", llm_string)
assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
# clear the cache
get_llm_cache().clear(llm_string=llm_string)
@pytest.mark.requires("pymongo")
@pytest.mark.skipif(
not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
)
def test_azure_cosmos_db_semantic_cache_multi_inner_product_hnsw() -> None:
set_llm_cache(
AzureCosmosDBSemanticCache(
cosmosdb_connection_string=CONNECTION_STRING,
cosmosdb_client=None,
embedding=FakeEmbeddings(),
database_name=DB_NAME,
collection_name=COLLECTION_NAME,
num_lists=num_lists,
similarity=CosmosDBSimilarityType.IP,
kind=CosmosDBVectorSearchType.VECTOR_HNSW,
dimensions=dimensions,
m=m,
ef_construction=ef_construction,
ef_search=ef_search,
score_threshold=score_threshold,
)
)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update(
"foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
)
# foo and bar will have the same embedding produced by FakeEmbeddings
cache_output = get_llm_cache().lookup("bar", llm_string)
assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
# clear the cache
get_llm_cache().clear(llm_string=llm_string)