cosmosdbnosql: Added Cosmos DB NoSQL Semantic Cache Integration with tests and jupyter notebook (#24424)

* Added Cosmos DB NoSQL Semantic Cache Integration with tests and jupyter notebook --------- Co-authored-by: Aayush Kataria <aayushkataria3011@gmail.com> Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-07-05 12:48:12 +00:00 · 2024-12-16 21:57:05 -05:00 · 2024-12-16 21:57:05 -05:00 · cdf6202156
commit cdf6202156
parent 27a9056725
6 changed files with 495 additions and 81 deletions
--- a/docs/docs/integrations/llm_caching.ipynb
+++ b/docs/docs/integrations/llm_caching.ipynb
@ -14,8 +14,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
-   "id": "88486f6f",
+   "execution_count": null,
+   "id": "f938e881",
   "metadata": {},
   "outputs": [],
   "source": [
@ -30,12 +30,12 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "10ad9224",
   "metadata": {
    "ExecuteTime": {
-     "end_time": "2024-04-12T02:05:57.319706Z",
-     "start_time": "2024-04-12T02:05:57.303868Z"
+     "end_time": "2024-12-06T00:54:06.474593Z",
+     "start_time": "2024-12-06T00:53:58.727138Z"
    }
   },
   "outputs": [],
@ -1820,7 +1820,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 83,
+   "execution_count": null,
   "id": "bc1570a2a77b58c8",
   "metadata": {
    "ExecuteTime": {
@ -1848,12 +1848,155 @@
     "output_type": "execute_result"
    }
   ],
+   "source": [
+    "%%time\n",
+    "# The second time it is, so it goes faster\n",
+    "llm.invoke(\"Tell me a joke\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "235ff73bf7143f13",
+   "metadata": {},
+   "source": [
+    "## Azure CosmosDB NoSql Semantic Cache\n",
+    "\n",
+    "You can use this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) for caching."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "41fea5aa7b2153ca",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-06T00:55:38.648972Z",
+     "start_time": "2024-12-06T00:55:38.290541Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from typing import Any, Dict\n",
+    "\n",
+    "from azure.cosmos import CosmosClient, PartitionKey\n",
+    "from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache\n",
+    "from langchain_openai import OpenAIEmbeddings\n",
+    "\n",
+    "HOST = \"COSMOS_DB_URI\"\n",
+    "KEY = \"COSMOS_DB_KEY\"\n",
+    "\n",
+    "cosmos_client = CosmosClient(HOST, KEY)\n",
+    "\n",
+    "\n",
+    "def get_vector_indexing_policy() -> dict:\n",
+    "    return {\n",
+    "        \"indexingMode\": \"consistent\",\n",
+    "        \"includedPaths\": [{\"path\": \"/*\"}],\n",
+    "        \"excludedPaths\": [{\"path\": '/\"_etag\"/?'}],\n",
+    "        \"vectorIndexes\": [{\"path\": \"/embedding\", \"type\": \"diskANN\"}],\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "def get_vector_embedding_policy() -> dict:\n",
+    "    return {\n",
+    "        \"vectorEmbeddings\": [\n",
+    "            {\n",
+    "                \"path\": \"/embedding\",\n",
+    "                \"dataType\": \"float32\",\n",
+    "                \"dimensions\": 1536,\n",
+    "                \"distanceFunction\": \"cosine\",\n",
+    "            }\n",
+    "        ]\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "cosmos_container_properties_test = {\"partition_key\": PartitionKey(path=\"/id\")}\n",
+    "cosmos_database_properties_test: Dict[str, Any] = {}\n",
+    "\n",
+    "set_llm_cache(\n",
+    "    AzureCosmosDBNoSqlSemanticCache(\n",
+    "        cosmos_client=cosmos_client,\n",
+    "        embedding=OpenAIEmbeddings(),\n",
+    "        vector_embedding_policy=get_vector_embedding_policy(),\n",
+    "        indexing_policy=get_vector_indexing_policy(),\n",
+    "        cosmos_container_properties=cosmos_container_properties_test,\n",
+    "        cosmos_database_properties=cosmos_database_properties_test,\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "1e1cd93819921bf6",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-06T00:55:44.513080Z",
+     "start_time": "2024-12-06T00:55:41.353843Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 374 ms, sys: 34.2 ms, total: 408 ms\n",
+      "Wall time: 3.15 s\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was two-tired!\""
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "%%time\n",
    "# The first time, it is not yet in cache, so it should take longer\n",
    "llm.invoke(\"Tell me a joke\")"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "576ce24c1244812a",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-12-06T00:55:50.925865Z",
+     "start_time": "2024-12-06T00:55:50.548520Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 17.7 ms, sys: 2.88 ms, total: 20.6 ms\n",
+      "Wall time: 373 ms\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was two-tired!\""
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "# The second time it is, so it goes faster\n",
+    "llm.invoke(\"Tell me a joke\")"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "306ff47b",
--- a/libs/community/langchain_community/cache.py
+++ b/libs/community/langchain_community/cache.py
@ -80,7 +80,10 @@ from langchain_community.utilities.astradb import (
 from langchain_community.utilities.astradb import (
    _AstraDBCollectionEnvironment,
 )
-from langchain_community.vectorstores import AzureCosmosDBVectorSearch
+from langchain_community.vectorstores import (
+    AzureCosmosDBNoSqlVectorSearch,
+    AzureCosmosDBVectorSearch,
+)
 from langchain_community.vectorstores import (
    OpenSearchVectorSearch as OpenSearchVectorStore,
 )
@ -93,6 +96,7 @@ if TYPE_CHECKING:
    import momento
    import pymemcache
    from astrapy.db import AstraDB, AsyncAstraDB
+    from azure.cosmos.cosmos_client import CosmosClient
    from cassandra.cluster import Session as CassandraSession


@ -2103,7 +2107,7 @@ class AzureCosmosDBSemanticCache(BaseCache):
        ef_construction: int = 64,
        ef_search: int = 40,
        score_threshold: Optional[float] = None,
-        application_name: str = "LANGCHAIN_CACHING_PYTHON",
+        application_name: str = "LangChain-CDBMongoVCore-SemanticCache-Python",
    ):
        """
        Args:
@ -2268,7 +2272,6 @@ class AzureCosmosDBSemanticCache(BaseCache):
        index_name = self._index_name(kwargs["llm_string"])
        if index_name in self._cache_dict:
            self._cache_dict[index_name].get_collection().delete_many({})
-            # self._cache_dict[index_name].clear_collection()

    @staticmethod
    def _validate_enum_value(value: Any, enum_type: Type[Enum]) -> None:
@ -2276,6 +2279,111 @@ class AzureCosmosDBSemanticCache(BaseCache):
            raise ValueError(f"Invalid enum value: {value}. Expected {enum_type}.")


+class AzureCosmosDBNoSqlSemanticCache(BaseCache):
+    """Cache that uses Cosmos DB NoSQL backend"""
+
+    def __init__(
+        self,
+        embedding: Embeddings,
+        cosmos_client: CosmosClient,
+        database_name: str = "CosmosNoSqlCacheDB",
+        container_name: str = "CosmosNoSqlCacheContainer",
+        *,
+        vector_embedding_policy: Dict[str, Any],
+        indexing_policy: Dict[str, Any],
+        cosmos_container_properties: Dict[str, Any],
+        cosmos_database_properties: Dict[str, Any],
+        create_container: bool = True,
+    ):
+        self.cosmos_client = cosmos_client
+        self.database_name = database_name
+        self.container_name = container_name
+        self.embedding = embedding
+        self.vector_embedding_policy = vector_embedding_policy
+        self.indexing_policy = indexing_policy
+        self.cosmos_container_properties = cosmos_container_properties
+        self.cosmos_database_properties = cosmos_database_properties
+        self.create_container = create_container
+        self._cache_dict: Dict[str, AzureCosmosDBNoSqlVectorSearch] = {}
+
+    def _cache_name(self, llm_string: str) -> str:
+        hashed_index = _hash(llm_string)
+        return f"cache:{hashed_index}"
+
+    def _get_llm_cache(self, llm_string: str) -> AzureCosmosDBNoSqlVectorSearch:
+        cache_name = self._cache_name(llm_string)
+
+        # return vectorstore client for the specific llm string
+        if cache_name in self._cache_dict:
+            return self._cache_dict[cache_name]
+
+        # create new vectorstore client to create the cache
+        if self.cosmos_client:
+            self._cache_dict[cache_name] = AzureCosmosDBNoSqlVectorSearch(
+                cosmos_client=self.cosmos_client,
+                embedding=self.embedding,
+                vector_embedding_policy=self.vector_embedding_policy,
+                indexing_policy=self.indexing_policy,
+                cosmos_container_properties=self.cosmos_container_properties,
+                cosmos_database_properties=self.cosmos_database_properties,
+                database_name=self.database_name,
+                container_name=self.container_name,
+                create_container=self.create_container,
+            )
+
+        return self._cache_dict[cache_name]
+
+    def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
+        """Look up based on prompt."""
+        llm_cache = self._get_llm_cache(llm_string)
+        generations: List = []
+        # Read from a Hash
+        results = llm_cache.similarity_search(
+            query=prompt,
+            k=1,
+        )
+        if results:
+            for document in results:
+                try:
+                    generations.extend(loads(document.metadata["return_val"]))
+                except Exception:
+                    logger.warning(
+                        "Retrieving a cache value that could not be deserialized "
+                        "properly. This is likely due to the cache being in an "
+                        "older format. Please recreate your cache to avoid this "
+                        "error."
+                    )
+
+                    generations.extend(
+                        _load_generations_from_json(document.metadata["return_val"])
+                    )
+        return generations if generations else None
+
+    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
+        """Update cache based on prompt and llm_string."""
+        for gen in return_val:
+            if not isinstance(gen, Generation):
+                raise ValueError(
+                    "CosmosDBNoSqlSemanticCache only supports caching of "
+                    f"normal LLM generations, got {type(gen)}"
+                )
+        llm_cache = self._get_llm_cache(llm_string)
+        metadata = {
+            "llm_string": llm_string,
+            "prompt": prompt,
+            "return_val": dumps([g for g in return_val]),
+        }
+        llm_cache.add_texts(texts=[prompt], metadatas=[metadata])
+
+    def clear(self, **kwargs: Any) -> None:
+        """Clear semantic cache for a given llm_string."""
+        cache_name = self._cache_name(llm_string=kwargs["llm-string"])
+        if cache_name in self._cache_dict:
+            container = self._cache_dict["cache_name"].get_container()
+            for item in container.read_all_items():
+                container.delete_item(item)
+
+
 class OpenSearchSemanticCache(BaseCache):
    """Cache that uses OpenSearch vector store backend"""

--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db.py
@ -82,7 +82,7 @@ class AzureCosmosDBVectorSearch(VectorStore):
        index_name: str = "vectorSearchIndex",
        text_key: str = "textContent",
        embedding_key: str = "vectorContent",
-        application_name: str = "LANGCHAIN_PYTHON",
+        application_name: str = "LangChain-CDBMongoVCore-VectorStore-Python",
    ):
        """Constructor for AzureCosmosDBVectorSearch

@ -121,7 +121,7 @@ class AzureCosmosDBVectorSearch(VectorStore):
        connection_string: str,
        namespace: str,
        embedding: Embeddings,
-        application_name: str = "LANGCHAIN_PYTHON",
+        application_name: str = "LangChain-CDBMongoVCore-VectorStore-Python",
        **kwargs: Any,
    ) -> AzureCosmosDBVectorSearch:
        """Creates an Instance of AzureCosmosDBVectorSearch
--- a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
+++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py
@ -14,7 +14,7 @@ from pydantic import BaseModel, Field
 from langchain_community.vectorstores.utils import maximal_marginal_relevance

 if TYPE_CHECKING:
-    from azure.cosmos import CosmosClient
+    from azure.cosmos import ContainerProxy, CosmosClient
    from azure.identity import DefaultAzureCredential

 USER_AGENT = ("LangChain-CDBNoSql-VectorStore-Python",)
@ -859,3 +859,6 @@ class AzureCosmosDBNoSqlVectorSearch(VectorStore):
            "$full_text_contains_any": "FullTextContainsAny",
        }
        return operator_map
+
+    def get_container(self) -> ContainerProxy:
+        return self._container
--- a/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
+++ b/libs/community/tests/integration_tests/cache/test_azure_cosmosdbnosql_cache.py
@ -0,0 +1,227 @@
+"""Test` Azure CosmosDB NoSql cache functionality."""
+
+from typing import Any, Dict
+
+import pytest
+from langchain.globals import get_llm_cache, set_llm_cache
+from langchain_core.outputs import Generation
+
+from langchain_community.cache import AzureCosmosDBNoSqlSemanticCache
+from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
+from tests.unit_tests.llms.fake_llm import FakeLLM
+
+HOST = "COSMOS_DB_URI"
+KEY = "COSMOS_DB_KEY"
+
+
+@pytest.fixture()
+def cosmos_client() -> Any:
+    from azure.cosmos import CosmosClient
+
+    return CosmosClient(HOST, KEY)
+
+
+@pytest.fixture()
+def partition_key() -> Any:
+    from azure.cosmos import PartitionKey
+
+    return PartitionKey(path="/id")
+
+
+# cosine, euclidean, innerproduct
+def indexing_policy(index_type: str) -> dict:
+    return {
+        "indexingMode": "consistent",
+        "includedPaths": [{"path": "/*"}],
+        "excludedPaths": [{"path": '/"_etag"/?'}],
+        "vectorIndexes": [{"path": "/embedding", "type": index_type}],
+    }
+
+
+def vector_embedding_policy(distance_function: str) -> dict:
+    return {
+        "vectorEmbeddings": [
+            {
+                "path": "/embedding",
+                "dataType": "float32",
+                "distanceFunction": distance_function,
+                "dimensions": 1536,
+            }
+        ]
+    }
+
+
+cosmos_container_properties_test = {"partition_key": partition_key}
+cosmos_database_properties_test: Dict[str, Any] = {}
+
+
+def test_azure_cosmos_db_nosql_semantic_cache_cosine_quantizedflat(
+    cosmos_client: Any,
+) -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=cosmos_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy("cosine"),
+            indexing_policy=indexing_policy("quantizedFlat"),
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+def test_azure_cosmos_db_nosql_semantic_cache_cosine_flat(
+    cosmos_client: Any,
+) -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=cosmos_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy("cosine"),
+            indexing_policy=indexing_policy("flat"),
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_quantizedflat(
+    cosmos_client: Any,
+) -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=cosmos_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy("dotProduct"),
+            indexing_policy=indexing_policy("quantizedFlat"),
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+def test_azure_cosmos_db_nosql_semantic_cache_dotproduct_flat(
+    cosmos_client: Any,
+) -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=cosmos_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy("dotProduct"),
+            indexing_policy=indexing_policy("flat"),
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+def test_azure_cosmos_db_nosql_semantic_cache_euclidean_quantizedflat(
+    cosmos_client: Any,
+) -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=cosmos_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy("euclidean"),
+            indexing_policy=indexing_policy("quantizedFlat"),
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+def test_azure_cosmos_db_nosql_semantic_cache_euclidean_flat(
+    cosmos_client: Any,
+) -> None:
+    set_llm_cache(
+        AzureCosmosDBNoSqlSemanticCache(
+            cosmos_client=cosmos_client,
+            embedding=FakeEmbeddings(),
+            vector_embedding_policy=vector_embedding_policy("euclidean"),
+            indexing_policy=indexing_policy("flat"),
+            cosmos_container_properties=cosmos_container_properties_test,
+            cosmos_database_properties=cosmos_database_properties_test,
+        )
+    )
+
+    llm = FakeLLM()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by FakeEmbeddings
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
--- a/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py
@ -45,14 +45,6 @@ def partition_key() -> Any:
    return PartitionKey(path="/id")


-@pytest.fixture()
-def azure_openai_embeddings() -> Any:
-    openai_embeddings: OpenAIEmbeddings = OpenAIEmbeddings(
-        deployment=model_deployment, model=model_name, chunk_size=1
-    )
-    return openai_embeddings
-
-
 def safe_delete_database(cosmos_client: Any) -> None:
    cosmos_client.delete_database(database_name)

@ -101,7 +93,7 @@ class TestAzureCosmosDBNoSqlVectorSearch:

        store = AzureCosmosDBNoSqlVectorSearch.from_documents(
            documents,
-            azure_openai_embeddings,
+            embedding=azure_openai_embeddings,
            cosmos_client=cosmos_client,
            database_name=database_name,
            container_name=container_name,
@ -175,7 +167,7 @@ class TestAzureCosmosDBNoSqlVectorSearch:

        store = AzureCosmosDBNoSqlVectorSearch.from_documents(
            documents,
-            azure_openai_embeddings,
+            embedding=azure_openai_embeddings,
            cosmos_client=cosmos_client,
            database_name=database_name,
            container_name=container_name,
@ -195,11 +187,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
        assert "Border Collies" in output[0].page_content
        assert output[0].metadata["a"] == 1

-        # pre_filter = {
-        #     "conditions": [
-        #         {"property": "metadata.a", "operator": "$eq", "value": 1},
-        #     ],
-        # }
        pre_filter = PreFilter(
            conditions=[
                Condition(property="metadata.a", operator="$eq", value=1),
@ -213,11 +200,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
        assert "Border Collies" in output[0].page_content
        assert output[0].metadata["a"] == 1

-        # pre_filter = {
-        #     "conditions": [
-        #         {"property": "metadata.a", "operator": "$eq", "value": 1},
-        #     ],
-        # }
        pre_filter = PreFilter(
            conditions=[
                Condition(property="metadata.a", operator="$eq", value=1),
@ -262,15 +244,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
        sleep(480)  # waits for Cosmos DB to save contents to the collection

        # Full text search contains any
-        # pre_filter = {
-        #     "conditions": [
-        #         {
-        #             "property": "text",
-        #             "operator": "$full_text_contains_any",
-        #             "value": "intelligent herders",
-        #         },
-        #     ],
-        # }
        pre_filter = PreFilter(
            conditions=[
                Condition(
@ -292,15 +265,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
        assert "Border Collies" in output[0].page_content

        # Full text search contains all
-        # pre_filter = {
-        #     "conditions": [
-        #         {
-        #             "property": "text",
-        #             "operator": "$full_text_contains_all",
-        #             "value": "intelligent herders",
-        #         },
-        #     ],
-        # }
        pre_filter = PreFilter(
            conditions=[
                Condition(
@ -332,11 +296,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
        assert "Standard Poodles" in output[0].page_content

        # Full text search BM25 ranking with filtering
-        # pre_filter = {
-        #     "conditions": [
-        #         {"property": "metadata.a", "operator": "$eq", "value": 1},
-        #     ],
-        # }
        pre_filter = PreFilter(
            conditions=[
                Condition(property="metadata.a", operator="$eq", value=1),
@ -363,11 +322,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
        assert "Border Collies" in output[0].page_content

        # Hybrid search RRF ranking with filtering
-        # pre_filter = {
-        #     "conditions": [
-        #         {"property": "metadata.a", "operator": "$eq", "value": 1},
-        #     ],
-        # }
        pre_filter = PreFilter(
            conditions=[
                Condition(property="metadata.a", operator="$eq", value=1),
@ -385,16 +339,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
        assert "Border Collies" in output[0].page_content

        # Full text search BM25 ranking with full text filtering
-        # pre_filter = {
-        #     "conditions": [
-        #         {
-        #             "property": "text",
-        #             "operator": "$full_text_contains",
-        #             "value": "energetic",
-        #         },
-        #     ]
-        # }
-
        pre_filter = PreFilter(
            conditions=[
                Condition(
@ -414,17 +358,6 @@ class TestAzureCosmosDBNoSqlVectorSearch:
        assert "Border Collies" in output[0].page_content

        # Full text search BM25 ranking with full text filtering
-        # pre_filter = {
-        #     "conditions": [
-        #         {
-        #             "property": "text",
-        #             "operator": "$full_text_contains",
-        #             "value": "energetic",
-        #         },
-        #         {"property": "metadata.a", "operator": "$eq", "value": 2},
-        #     ],
-        #     "logical_operator": "$and",
-        # }
        pre_filter = PreFilter(
            conditions=[
                Condition(