mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-07 05:52:15 +00:00
cosmosdbnosql: Added Cosmos DB NoSQL Semantic Cache Integration with tests and jupyter notebook (#24424)
* Added Cosmos DB NoSQL Semantic Cache Integration with tests and jupyter notebook --------- Co-authored-by: Aayush Kataria <aayushkataria3011@gmail.com> Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
227
libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
vendored
Normal file
227
libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
vendored
Normal file
@@ -0,0 +1,227 @@
|
||||
"""Test` Azure CosmosDB NoSql cache functionality."""
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
from langchain.globals import get_llm_cache, set_llm_cache
|
||||
from langchain_core.outputs import Generation
|
||||
|
||||
from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||
from tests.unit_tests.llms.fake_llm import FakeLLM
|
||||
|
||||
HOST = "COSMOS_DB_URI"
|
||||
KEY = "COSMOS_DB_KEY"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def cosmos_client() -> Any:
|
||||
from azure.cosmos import CosmosClient
|
||||
|
||||
return CosmosClient(HOST, KEY)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def partition_key() -> Any:
|
||||
from azure.cosmos import PartitionKey
|
||||
|
||||
return PartitionKey(path="/id")
|
||||
|
||||
|
||||
# cosine, euclidean, innerproduct
|
||||
def indexing_policy(index_type: str) -> dict:
|
||||
return {
|
||||
"indexingMode": "consistent",
|
||||
"includedPaths": [{"path": "/*"}],
|
||||
"excludedPaths": [{"path": '/"_etag"/?'}],
|
||||
"vectorIndexes": [{"path": "/embedding", "type": index_type}],
|
||||
}
|
||||
|
||||
|
||||
def vector_embedding_policy(distance_function: str) -> dict:
|
||||
return {
|
||||
"vectorEmbeddings": [
|
||||
{
|
||||
"path": "/embedding",
|
||||
"dataType": "float32",
|
||||
"distanceFunction": distance_function,
|
||||
"dimensions": 1536,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
cosmos_container_properties_test = {"partition_key": partition_key}
|
||||
cosmos_database_properties_test: Dict[str, Any] = {}
|
||||
|
||||
|
||||
def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat(
|
||||
cosmos_client: Any,
|
||||
) -> None:
|
||||
set_llm_cache(
|
||||
AzureCosmosDBNoSqlSemanticCache(
|
||||
cosmos_client=cosmos_client,
|
||||
embedding=FakeEmbeddings(),
|
||||
vector_embedding_policy=vector_embedding_policy("cosine"),
|
||||
indexing_policy=indexing_policy("quantizedFlat"),
|
||||
cosmos_container_properties=cosmos_container_properties_test,
|
||||
cosmos_database_properties=cosmos_database_properties_test,
|
||||
)
|
||||
)
|
||||
|
||||
llm = FakeLLM()
|
||||
params = llm.dict()
|
||||
params["stop"] = None
|
||||
llm_string = str(sorted([(k, v) for k, v in params.items()]))
|
||||
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
|
||||
|
||||
# foo and bar will have the same embedding produced by FakeEmbeddings
|
||||
cache_output = get_llm_cache().lookup("bar", llm_string)
|
||||
assert cache_output == [Generation(text="fizz")]
|
||||
|
||||
# clear the cache
|
||||
get_llm_cache().clear(llm_string=llm_string)
|
||||
|
||||
|
||||
def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat(
|
||||
cosmos_client: Any,
|
||||
) -> None:
|
||||
set_llm_cache(
|
||||
AzureCosmosDBNoSqlSemanticCache(
|
||||
cosmos_client=cosmos_client,
|
||||
embedding=FakeEmbeddings(),
|
||||
vector_embedding_policy=vector_embedding_policy("cosine"),
|
||||
indexing_policy=indexing_policy("flat"),
|
||||
cosmos_container_properties=cosmos_container_properties_test,
|
||||
cosmos_database_properties=cosmos_database_properties_test,
|
||||
)
|
||||
)
|
||||
|
||||
llm = FakeLLM()
|
||||
params = llm.dict()
|
||||
params["stop"] = None
|
||||
llm_string = str(sorted([(k, v) for k, v in params.items()]))
|
||||
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
|
||||
|
||||
# foo and bar will have the same embedding produced by FakeEmbeddings
|
||||
cache_output = get_llm_cache().lookup("bar", llm_string)
|
||||
assert cache_output == [Generation(text="fizz")]
|
||||
|
||||
# clear the cache
|
||||
get_llm_cache().clear(llm_string=llm_string)
|
||||
|
||||
|
||||
def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat(
|
||||
cosmos_client: Any,
|
||||
) -> None:
|
||||
set_llm_cache(
|
||||
AzureCosmosDBNoSqlSemanticCache(
|
||||
cosmos_client=cosmos_client,
|
||||
embedding=FakeEmbeddings(),
|
||||
vector_embedding_policy=vector_embedding_policy("dotProduct"),
|
||||
indexing_policy=indexing_policy("quantizedFlat"),
|
||||
cosmos_container_properties=cosmos_container_properties_test,
|
||||
cosmos_database_properties=cosmos_database_properties_test,
|
||||
)
|
||||
)
|
||||
|
||||
llm = FakeLLM()
|
||||
params = llm.dict()
|
||||
params["stop"] = None
|
||||
llm_string = str(sorted([(k, v) for k, v in params.items()]))
|
||||
get_llm_cache().update(
|
||||
"foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
|
||||
)
|
||||
|
||||
# foo and bar will have the same embedding produced by FakeEmbeddings
|
||||
cache_output = get_llm_cache().lookup("bar", llm_string)
|
||||
assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
|
||||
|
||||
# clear the cache
|
||||
get_llm_cache().clear(llm_string=llm_string)
|
||||
|
||||
|
||||
def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat(
|
||||
cosmos_client: Any,
|
||||
) -> None:
|
||||
set_llm_cache(
|
||||
AzureCosmosDBNoSqlSemanticCache(
|
||||
cosmos_client=cosmos_client,
|
||||
embedding=FakeEmbeddings(),
|
||||
vector_embedding_policy=vector_embedding_policy("dotProduct"),
|
||||
indexing_policy=indexing_policy("flat"),
|
||||
cosmos_container_properties=cosmos_container_properties_test,
|
||||
cosmos_database_properties=cosmos_database_properties_test,
|
||||
)
|
||||
)
|
||||
|
||||
llm = FakeLLM()
|
||||
params = llm.dict()
|
||||
params["stop"] = None
|
||||
llm_string = str(sorted([(k, v) for k, v in params.items()]))
|
||||
get_llm_cache().update(
|
||||
"foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
|
||||
)
|
||||
|
||||
# foo and bar will have the same embedding produced by FakeEmbeddings
|
||||
cache_output = get_llm_cache().lookup("bar", llm_string)
|
||||
assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
|
||||
|
||||
# clear the cache
|
||||
get_llm_cache().clear(llm_string=llm_string)
|
||||
|
||||
|
||||
def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat(
|
||||
cosmos_client: Any,
|
||||
) -> None:
|
||||
set_llm_cache(
|
||||
AzureCosmosDBNoSqlSemanticCache(
|
||||
cosmos_client=cosmos_client,
|
||||
embedding=FakeEmbeddings(),
|
||||
vector_embedding_policy=vector_embedding_policy("euclidean"),
|
||||
indexing_policy=indexing_policy("quantizedFlat"),
|
||||
cosmos_container_properties=cosmos_container_properties_test,
|
||||
cosmos_database_properties=cosmos_database_properties_test,
|
||||
)
|
||||
)
|
||||
|
||||
llm = FakeLLM()
|
||||
params = llm.dict()
|
||||
params["stop"] = None
|
||||
llm_string = str(sorted([(k, v) for k, v in params.items()]))
|
||||
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
|
||||
|
||||
# foo and bar will have the same embedding produced by FakeEmbeddings
|
||||
cache_output = get_llm_cache().lookup("bar", llm_string)
|
||||
assert cache_output == [Generation(text="fizz")]
|
||||
|
||||
# clear the cache
|
||||
get_llm_cache().clear(llm_string=llm_string)
|
||||
|
||||
|
||||
def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat(
|
||||
cosmos_client: Any,
|
||||
) -> None:
|
||||
set_llm_cache(
|
||||
AzureCosmosDBNoSqlSemanticCache(
|
||||
cosmos_client=cosmos_client,
|
||||
embedding=FakeEmbeddings(),
|
||||
vector_embedding_policy=vector_embedding_policy("euclidean"),
|
||||
indexing_policy=indexing_policy("flat"),
|
||||
cosmos_container_properties=cosmos_container_properties_test,
|
||||
cosmos_database_properties=cosmos_database_properties_test,
|
||||
)
|
||||
)
|
||||
|
||||
llm = FakeLLM()
|
||||
params = llm.dict()
|
||||
params["stop"] = None
|
||||
llm_string = str(sorted([(k, v) for k, v in params.items()]))
|
||||
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
|
||||
|
||||
# foo and bar will have the same embedding produced by FakeEmbeddings
|
||||
cache_output = get_llm_cache().lookup("bar", llm_string)
|
||||
assert cache_output == [Generation(text="fizz")]
|
||||
|
||||
# clear the cache
|
||||
get_llm_cache().clear(llm_string=llm_string)
|
@@ -45,14 +45,6 @@ def partition_key() -> Any:
|
||||
return PartitionKey(path="/id")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def azure_openai_embeddings() -> Any:
|
||||
openai_embeddings: OpenAIEmbeddings = OpenAIEmbeddings(
|
||||
deployment=model_deployment, model=model_name, chunk_size=1
|
||||
)
|
||||
return openai_embeddings
|
||||
|
||||
|
||||
def safe_delete_database(cosmos_client: Any) -> None:
|
||||
cosmos_client.delete_database(database_name)
|
||||
|
||||
@@ -101,7 +93,7 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
|
||||
store = AzureCosmosDBNoSqlVectorSearch.from_documents(
|
||||
documents,
|
||||
azure_openai_embeddings,
|
||||
embedding=azure_openai_embeddings,
|
||||
cosmos_client=cosmos_client,
|
||||
database_name=database_name,
|
||||
container_name=container_name,
|
||||
@@ -175,7 +167,7 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
|
||||
store = AzureCosmosDBNoSqlVectorSearch.from_documents(
|
||||
documents,
|
||||
azure_openai_embeddings,
|
||||
embedding=azure_openai_embeddings,
|
||||
cosmos_client=cosmos_client,
|
||||
database_name=database_name,
|
||||
container_name=container_name,
|
||||
@@ -195,11 +187,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
assert "Border Collies" in output[0].page_content
|
||||
assert output[0].metadata["a"] == 1
|
||||
|
||||
# pre_filter = {
|
||||
# "conditions": [
|
||||
# {"property": "metadata.a", "operator": "$eq", "value": 1},
|
||||
# ],
|
||||
# }
|
||||
pre_filter = PreFilter(
|
||||
conditions=[
|
||||
Condition(property="metadata.a", operator="$eq", value=1),
|
||||
@@ -213,11 +200,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
assert "Border Collies" in output[0].page_content
|
||||
assert output[0].metadata["a"] == 1
|
||||
|
||||
# pre_filter = {
|
||||
# "conditions": [
|
||||
# {"property": "metadata.a", "operator": "$eq", "value": 1},
|
||||
# ],
|
||||
# }
|
||||
pre_filter = PreFilter(
|
||||
conditions=[
|
||||
Condition(property="metadata.a", operator="$eq", value=1),
|
||||
@@ -262,15 +244,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
sleep(480) # waits for Cosmos DB to save contents to the collection
|
||||
|
||||
# Full text search contains any
|
||||
# pre_filter = {
|
||||
# "conditions": [
|
||||
# {
|
||||
# "property": "text",
|
||||
# "operator": "$full_text_contains_any",
|
||||
# "value": "intelligent herders",
|
||||
# },
|
||||
# ],
|
||||
# }
|
||||
pre_filter = PreFilter(
|
||||
conditions=[
|
||||
Condition(
|
||||
@@ -292,15 +265,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
assert "Border Collies" in output[0].page_content
|
||||
|
||||
# Full text search contains all
|
||||
# pre_filter = {
|
||||
# "conditions": [
|
||||
# {
|
||||
# "property": "text",
|
||||
# "operator": "$full_text_contains_all",
|
||||
# "value": "intelligent herders",
|
||||
# },
|
||||
# ],
|
||||
# }
|
||||
pre_filter = PreFilter(
|
||||
conditions=[
|
||||
Condition(
|
||||
@@ -332,11 +296,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
assert "Standard Poodles" in output[0].page_content
|
||||
|
||||
# Full text search BM25 ranking with filtering
|
||||
# pre_filter = {
|
||||
# "conditions": [
|
||||
# {"property": "metadata.a", "operator": "$eq", "value": 1},
|
||||
# ],
|
||||
# }
|
||||
pre_filter = PreFilter(
|
||||
conditions=[
|
||||
Condition(property="metadata.a", operator="$eq", value=1),
|
||||
@@ -363,11 +322,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
assert "Border Collies" in output[0].page_content
|
||||
|
||||
# Hybrid search RRF ranking with filtering
|
||||
# pre_filter = {
|
||||
# "conditions": [
|
||||
# {"property": "metadata.a", "operator": "$eq", "value": 1},
|
||||
# ],
|
||||
# }
|
||||
pre_filter = PreFilter(
|
||||
conditions=[
|
||||
Condition(property="metadata.a", operator="$eq", value=1),
|
||||
@@ -385,16 +339,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
assert "Border Collies" in output[0].page_content
|
||||
|
||||
# Full text search BM25 ranking with full text filtering
|
||||
# pre_filter = {
|
||||
# "conditions": [
|
||||
# {
|
||||
# "property": "text",
|
||||
# "operator": "$full_text_contains",
|
||||
# "value": "energetic",
|
||||
# },
|
||||
# ]
|
||||
# }
|
||||
|
||||
pre_filter = PreFilter(
|
||||
conditions=[
|
||||
Condition(
|
||||
@@ -414,17 +358,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
|
||||
assert "Border Collies" in output[0].page_content
|
||||
|
||||
# Full text search BM25 ranking with full text filtering
|
||||
# pre_filter = {
|
||||
# "conditions": [
|
||||
# {
|
||||
# "property": "text",
|
||||
# "operator": "$full_text_contains",
|
||||
# "value": "energetic",
|
||||
# },
|
||||
# {"property": "metadata.a", "operator": "$eq", "value": 2},
|
||||
# ],
|
||||
# "logical_operator": "$and",
|
||||
# }
|
||||
pre_filter = PreFilter(
|
||||
conditions=[
|
||||
Condition(
|
||||
|
Reference in New Issue
Block a user