mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-08 06:23:20 +00:00
community[patch]: Fixing some caching issues for AzureCosmosDBSemanticCache (#18884)
Fixing some issues for AzureCosmosDBSemanticCache - Added the entry for "AzureCosmosDBSemanticCache" which was missing in langchain/cache.py - Added application name when creating the MongoClient for the AzureCosmosDBVectorSearch, for tracking purposes. @baskaryan, can you please review this PR, we need this to go in asap. These are just small fixes which we found today in our testing.
This commit is contained in:
@@ -12,12 +12,12 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 3,
|
||||||
"id": "10ad9224",
|
"id": "10ad9224",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
"end_time": "2024-02-02T21:34:23.461332Z",
|
"end_time": "2024-03-18T01:01:08.425930Z",
|
||||||
"start_time": "2024-02-02T21:34:23.394461Z"
|
"start_time": "2024-03-18T01:01:08.327196Z"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -41,7 +41,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 11,
|
||||||
"id": "426ff912",
|
"id": "426ff912",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -1356,18 +1356,26 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"id": "40624c26e86b57a4",
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Azure Cosmos DB Semantic Cache\n",
|
"## Azure Cosmos DB Semantic Cache\n",
|
||||||
"\n",
|
"\n",
|
||||||
"You can use this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) for caching."
|
"You can use this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) for caching."
|
||||||
],
|
]
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"id": "40624c26e86b57a4"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "4a9d592db01b11b2",
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-03-18T01:01:32.014750Z",
|
||||||
|
"start_time": "2024-03-18T01:01:31.955991Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.cache import AzureCosmosDBSemanticCache\n",
|
"from langchain.cache import AzureCosmosDBSemanticCache\n",
|
||||||
@@ -1379,11 +1387,11 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Read more about Azure CosmosDB Mongo vCore vector search here https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search\n",
|
"# Read more about Azure CosmosDB Mongo vCore vector search here https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search\n",
|
||||||
"\n",
|
"\n",
|
||||||
"INDEX_NAME = \"langchain-test-index\"\n",
|
|
||||||
"NAMESPACE = \"langchain_test_db.langchain_test_collection\"\n",
|
"NAMESPACE = \"langchain_test_db.langchain_test_collection\"\n",
|
||||||
"CONNECTION_STRING = (\n",
|
"CONNECTION_STRING = (\n",
|
||||||
" \"Please provide your azure cosmos mongo vCore vector db connection string\"\n",
|
" \"Please provide your azure cosmos mongo vCore vector db connection string\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
|
"\n",
|
||||||
"DB_NAME, COLLECTION_NAME = NAMESPACE.split(\".\")\n",
|
"DB_NAME, COLLECTION_NAME = NAMESPACE.split(\".\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Default value for these params\n",
|
"# Default value for these params\n",
|
||||||
@@ -1394,7 +1402,9 @@
|
|||||||
"m = 16\n",
|
"m = 16\n",
|
||||||
"ef_construction = 64\n",
|
"ef_construction = 64\n",
|
||||||
"ef_search = 40\n",
|
"ef_search = 40\n",
|
||||||
"score_threshold = 0.1\n",
|
"score_threshold = 0.9\n",
|
||||||
|
"application_name = \"LANGCHAIN_CACHING_PYTHON\"\n",
|
||||||
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"set_llm_cache(\n",
|
"set_llm_cache(\n",
|
||||||
" AzureCosmosDBSemanticCache(\n",
|
" AzureCosmosDBSemanticCache(\n",
|
||||||
@@ -1411,18 +1421,10 @@
|
|||||||
" ef_construction=ef_construction,\n",
|
" ef_construction=ef_construction,\n",
|
||||||
" ef_search=ef_search,\n",
|
" ef_search=ef_search,\n",
|
||||||
" score_threshold=score_threshold,\n",
|
" score_threshold=score_threshold,\n",
|
||||||
|
" application_name=application_name,\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
")"
|
")"
|
||||||
],
|
]
|
||||||
"metadata": {
|
|
||||||
"collapsed": false,
|
|
||||||
"ExecuteTime": {
|
|
||||||
"end_time": "2024-02-02T21:34:49.457001Z",
|
|
||||||
"start_time": "2024-02-02T21:34:49.411293Z"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"id": "4a9d592db01b11b2",
|
|
||||||
"execution_count": 16
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
@@ -1431,15 +1433,15 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"CPU times: user 43.4 ms, sys: 7.23 ms, total: 50.7 ms\n",
|
"CPU times: user 45.6 ms, sys: 19.7 ms, total: 65.3 ms\n",
|
||||||
"Wall time: 1.61 s\n"
|
"Wall time: 2.29 s\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": "\"\\n\\nWhy couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\""
|
"text/plain": "'\\n\\nWhy was the math book sad? Because it had too many problems.'"
|
||||||
},
|
},
|
||||||
"execution_count": 17,
|
"execution_count": 82,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@@ -1452,47 +1454,46 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
"end_time": "2024-02-02T21:34:53.704234Z",
|
"end_time": "2024-03-12T00:12:57.462226Z",
|
||||||
"start_time": "2024-02-02T21:34:52.091096Z"
|
"start_time": "2024-03-12T00:12:55.166201Z"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"id": "8488cf9c97ec7ab",
|
"id": "14ca942820e8140c",
|
||||||
"execution_count": 17
|
"execution_count": 82
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"outputs": [
|
"execution_count": 83,
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"CPU times: user 6.89 ms, sys: 2.24 ms, total: 9.13 ms\n",
|
|
||||||
"Wall time: 337 ms\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": "\"\\n\\nWhy couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\""
|
|
||||||
},
|
|
||||||
"execution_count": 18,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"# The first time, it is not yet in cache, so it should take longer\n",
|
|
||||||
"llm(\"Tell me a joke\")"
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false,
|
|
||||||
"ExecuteTime": {
|
|
||||||
"end_time": "2024-02-02T21:34:56.004502Z",
|
|
||||||
"start_time": "2024-02-02T21:34:55.650136Z"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"id": "bc1570a2a77b58c8",
|
"id": "bc1570a2a77b58c8",
|
||||||
"execution_count": 18
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-03-12T00:13:03.652755Z",
|
||||||
|
"start_time": "2024-03-12T00:13:03.159428Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"CPU times: user 9.61 ms, sys: 3.42 ms, total: 13 ms\n",
|
||||||
|
"Wall time: 474 ms\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "'\\n\\nWhy was the math book sad? Because it had too many problems.'"
|
||||||
|
},
|
||||||
|
"execution_count": 83,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"# The first time, it is not yet in cache, so it should take longer\n",
|
||||||
|
"llm(\"Tell me a joke\")"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
@@ -1743,7 +1744,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.17"
|
"version": "3.11.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
@@ -1879,6 +1879,7 @@ class AzureCosmosDBSemanticCache(BaseCache):
|
|||||||
ef_construction: int = 64,
|
ef_construction: int = 64,
|
||||||
ef_search: int = 40,
|
ef_search: int = 40,
|
||||||
score_threshold: Optional[float] = None,
|
score_threshold: Optional[float] = None,
|
||||||
|
application_name: str = "LANGCHAIN_CACHING_PYTHON",
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
@@ -1920,6 +1921,7 @@ class AzureCosmosDBSemanticCache(BaseCache):
|
|||||||
(40 by default). A higher value provides better
|
(40 by default). A higher value provides better
|
||||||
recall at the cost of speed.
|
recall at the cost of speed.
|
||||||
score_threshold: Maximum score used to filter the vector search documents.
|
score_threshold: Maximum score used to filter the vector search documents.
|
||||||
|
application_name: Application name for the client for tracking and logging
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self._validate_enum_value(similarity, CosmosDBSimilarityType)
|
self._validate_enum_value(similarity, CosmosDBSimilarityType)
|
||||||
@@ -1942,6 +1944,7 @@ class AzureCosmosDBSemanticCache(BaseCache):
|
|||||||
self.ef_search = ef_search
|
self.ef_search = ef_search
|
||||||
self.score_threshold = score_threshold
|
self.score_threshold = score_threshold
|
||||||
self._cache_dict: Dict[str, AzureCosmosDBVectorSearch] = {}
|
self._cache_dict: Dict[str, AzureCosmosDBVectorSearch] = {}
|
||||||
|
self.application_name = application_name
|
||||||
|
|
||||||
def _index_name(self, llm_string: str) -> str:
|
def _index_name(self, llm_string: str) -> str:
|
||||||
hashed_index = _hash(llm_string)
|
hashed_index = _hash(llm_string)
|
||||||
@@ -1972,6 +1975,7 @@ class AzureCosmosDBSemanticCache(BaseCache):
|
|||||||
namespace=namespace,
|
namespace=namespace,
|
||||||
embedding=self.embedding,
|
embedding=self.embedding,
|
||||||
index_name=index_name,
|
index_name=index_name,
|
||||||
|
application_name=self.application_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
# create index for the vectorstore
|
# create index for the vectorstore
|
||||||
|
@@ -119,6 +119,7 @@ class AzureCosmosDBVectorSearch(VectorStore):
|
|||||||
connection_string: str,
|
connection_string: str,
|
||||||
namespace: str,
|
namespace: str,
|
||||||
embedding: Embeddings,
|
embedding: Embeddings,
|
||||||
|
application_name: str = "LANGCHAIN_PYTHON",
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> AzureCosmosDBVectorSearch:
|
) -> AzureCosmosDBVectorSearch:
|
||||||
"""Creates an Instance of AzureCosmosDBVectorSearch from a Connection String
|
"""Creates an Instance of AzureCosmosDBVectorSearch from a Connection String
|
||||||
@@ -140,7 +141,8 @@ class AzureCosmosDBVectorSearch(VectorStore):
|
|||||||
"Could not import pymongo, please install it with "
|
"Could not import pymongo, please install it with "
|
||||||
"`pip install pymongo`."
|
"`pip install pymongo`."
|
||||||
)
|
)
|
||||||
client: MongoClient = MongoClient(connection_string)
|
appname = application_name
|
||||||
|
client: MongoClient = MongoClient(connection_string, appname=appname)
|
||||||
db_name, collection_name = namespace.split(".")
|
db_name, collection_name = namespace.split(".")
|
||||||
collection = client[db_name][collection_name]
|
collection = client[db_name][collection_name]
|
||||||
return cls(collection, embedding, **kwargs)
|
return cls(collection, embedding, **kwargs)
|
||||||
|
@@ -35,6 +35,7 @@ m = 16
|
|||||||
ef_construction = 64
|
ef_construction = 64
|
||||||
ef_search = 40
|
ef_search = 40
|
||||||
score_threshold = 0.1
|
score_threshold = 0.1
|
||||||
|
application_name = "LANGCHAIN_PYTHON"
|
||||||
|
|
||||||
|
|
||||||
def prepare_collection() -> Any:
|
def prepare_collection() -> Any:
|
||||||
@@ -108,6 +109,7 @@ class TestAzureCosmosDBVectorSearch:
|
|||||||
azure_openai_embeddings,
|
azure_openai_embeddings,
|
||||||
collection=collection,
|
collection=collection,
|
||||||
index_name=INDEX_NAME,
|
index_name=INDEX_NAME,
|
||||||
|
application_name=application_name,
|
||||||
)
|
)
|
||||||
sleep(1) # waits for Cosmos DB to save contents to the collection
|
sleep(1) # waits for Cosmos DB to save contents to the collection
|
||||||
|
|
||||||
@@ -911,6 +913,7 @@ class TestAzureCosmosDBVectorSearch:
|
|||||||
NAMESPACE,
|
NAMESPACE,
|
||||||
azure_openai_embeddings,
|
azure_openai_embeddings,
|
||||||
index_name=INDEX_NAME,
|
index_name=INDEX_NAME,
|
||||||
|
application_name=application_name,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -926,6 +929,7 @@ class TestAzureCosmosDBVectorSearch:
|
|||||||
NAMESPACE,
|
NAMESPACE,
|
||||||
azure_openai_embeddings,
|
azure_openai_embeddings,
|
||||||
index_name=INDEX_NAME,
|
index_name=INDEX_NAME,
|
||||||
|
application_name=application_name,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
from langchain_community.cache import (
|
from langchain_community.cache import (
|
||||||
AstraDBCache,
|
AstraDBCache,
|
||||||
AstraDBSemanticCache,
|
AstraDBSemanticCache,
|
||||||
|
AzureCosmosDBSemanticCache,
|
||||||
CassandraCache,
|
CassandraCache,
|
||||||
CassandraSemanticCache,
|
CassandraSemanticCache,
|
||||||
FullLLMCache,
|
FullLLMCache,
|
||||||
@@ -32,4 +33,5 @@ __all__ = [
|
|||||||
"SQLAlchemyMd5Cache",
|
"SQLAlchemyMd5Cache",
|
||||||
"AstraDBCache",
|
"AstraDBCache",
|
||||||
"AstraDBSemanticCache",
|
"AstraDBSemanticCache",
|
||||||
|
"AzureCosmosDBSemanticCache",
|
||||||
]
|
]
|
||||||
|
@@ -36,6 +36,7 @@ m = 16
|
|||||||
ef_construction = 64
|
ef_construction = 64
|
||||||
ef_search = 40
|
ef_search = 40
|
||||||
score_threshold = 0.1
|
score_threshold = 0.1
|
||||||
|
application_name = "LANGCHAIN_CACHING_PYTHON"
|
||||||
|
|
||||||
|
|
||||||
def _has_env_vars() -> bool:
|
def _has_env_vars() -> bool:
|
||||||
@@ -66,6 +67,7 @@ def test_azure_cosmos_db_semantic_cache() -> None:
|
|||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
ef_search=ef_search,
|
ef_search=ef_search,
|
||||||
score_threshold=score_threshold,
|
score_threshold=score_threshold,
|
||||||
|
application_name=application_name,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -103,6 +105,7 @@ def test_azure_cosmos_db_semantic_cache_inner_product() -> None:
|
|||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
ef_search=ef_search,
|
ef_search=ef_search,
|
||||||
score_threshold=score_threshold,
|
score_threshold=score_threshold,
|
||||||
|
application_name=application_name,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -140,6 +143,7 @@ def test_azure_cosmos_db_semantic_cache_multi() -> None:
|
|||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
ef_search=ef_search,
|
ef_search=ef_search,
|
||||||
score_threshold=score_threshold,
|
score_threshold=score_threshold,
|
||||||
|
application_name=application_name,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -179,6 +183,7 @@ def test_azure_cosmos_db_semantic_cache_multi_inner_product() -> None:
|
|||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
ef_search=ef_search,
|
ef_search=ef_search,
|
||||||
score_threshold=score_threshold,
|
score_threshold=score_threshold,
|
||||||
|
application_name=application_name,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -218,6 +223,7 @@ def test_azure_cosmos_db_semantic_cache_hnsw() -> None:
|
|||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
ef_search=ef_search,
|
ef_search=ef_search,
|
||||||
score_threshold=score_threshold,
|
score_threshold=score_threshold,
|
||||||
|
application_name=application_name,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -255,6 +261,7 @@ def test_azure_cosmos_db_semantic_cache_inner_product_hnsw() -> None:
|
|||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
ef_search=ef_search,
|
ef_search=ef_search,
|
||||||
score_threshold=score_threshold,
|
score_threshold=score_threshold,
|
||||||
|
application_name=application_name,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -292,6 +299,7 @@ def test_azure_cosmos_db_semantic_cache_multi_hnsw() -> None:
|
|||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
ef_search=ef_search,
|
ef_search=ef_search,
|
||||||
score_threshold=score_threshold,
|
score_threshold=score_threshold,
|
||||||
|
application_name=application_name,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -331,6 +339,7 @@ def test_azure_cosmos_db_semantic_cache_multi_inner_product_hnsw() -> None:
|
|||||||
ef_construction=ef_construction,
|
ef_construction=ef_construction,
|
||||||
ef_search=ef_search,
|
ef_search=ef_search,
|
||||||
score_threshold=score_threshold,
|
score_threshold=score_threshold,
|
||||||
|
application_name=application_name,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user