mirror of
https://github.com/hwchase17/langchain.git
synced 2026-05-08 15:21:29 +00:00
fix(core): strip message IDs from cache keys using model_copy (#33915)
**Description:** *Closes #[33883](https://github.com/langchain-ai/langchain/issues/33883)* Chat model cache keys are generated by serializing messages via `dumps(messages)`. The optional `BaseMessage.id` field (a UUID used solely for tracing/threading) is included in this serialization, causing functionally identical messages to produce different cache keys. This results in repeated API calls, cache bloat, and degraded performance in production workloads (e.g., agents, RAG chains, long conversations). This change normalizes messages **only for cache key generation** by stripping the nonsemantic `id` field using Pydantic V2’s `model_copy(update={"id": None})`. The normalization is applied in both synchronous and asynchronous cache paths (`_generate_with_cache` / `_agenerate_with_cache`) immediately before `dumps()`. ```python normalized_messages = [ msg.model_copy(update={"id": None}) if getattr(msg, "id", None) is not None else msg for msg in messages ] prompt = dumps(normalized_messages) --------- Co-authored-by: Mason Daugherty <mason@langchain.dev> Co-authored-by: Mason Daugherty <github@mdrxy.com>
This commit is contained in:
@@ -1148,7 +1148,15 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
if check_cache:
|
||||
if llm_cache:
|
||||
llm_string = self._get_llm_string(stop=stop, **kwargs)
|
||||
prompt = dumps(messages)
|
||||
normalized_messages = [
|
||||
(
|
||||
msg.model_copy(update={"id": None})
|
||||
if getattr(msg, "id", None) is not None
|
||||
else msg
|
||||
)
|
||||
for msg in messages
|
||||
]
|
||||
prompt = dumps(normalized_messages)
|
||||
cache_val = llm_cache.lookup(prompt, llm_string)
|
||||
if isinstance(cache_val, list):
|
||||
converted_generations = self._convert_cached_generations(cache_val)
|
||||
@@ -1266,7 +1274,15 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
if check_cache:
|
||||
if llm_cache:
|
||||
llm_string = self._get_llm_string(stop=stop, **kwargs)
|
||||
prompt = dumps(messages)
|
||||
normalized_messages = [
|
||||
(
|
||||
msg.model_copy(update={"id": None})
|
||||
if getattr(msg, "id", None) is not None
|
||||
else msg
|
||||
)
|
||||
for msg in messages
|
||||
]
|
||||
prompt = dumps(normalized_messages)
|
||||
cache_val = await llm_cache.alookup(prompt, llm_string)
|
||||
if isinstance(cache_val, list):
|
||||
converted_generations = self._convert_cached_generations(cache_val)
|
||||
|
||||
@@ -13,7 +13,7 @@ from langchain_core.language_models.fake_chat_models import (
|
||||
GenericFakeChatModel,
|
||||
)
|
||||
from langchain_core.load import dumps
|
||||
from langchain_core.messages import AIMessage
|
||||
from langchain_core.messages import AIMessage, HumanMessage
|
||||
from langchain_core.outputs import ChatGeneration, Generation
|
||||
from langchain_core.outputs.chat_result import ChatResult
|
||||
|
||||
@@ -475,3 +475,61 @@ def test_token_costs_are_zeroed_out() -> None:
|
||||
assert isinstance(second_response, AIMessage)
|
||||
assert second_response.usage_metadata
|
||||
assert second_response.usage_metadata["total_cost"] == 0 # type: ignore[typeddict-item]
|
||||
|
||||
|
||||
def test_cache_key_ignores_message_id_sync() -> None:
|
||||
"""Test that message IDs are stripped from cache keys (sync).
|
||||
|
||||
Functionally identical messages with different IDs should produce
|
||||
the same cache key and result in cache hits.
|
||||
"""
|
||||
local_cache = InMemoryCache()
|
||||
model = FakeListChatModel(cache=local_cache, responses=["hello", "goodbye"])
|
||||
|
||||
# First call with a message that has an ID
|
||||
msg_with_id_1 = HumanMessage(content="How are you?", id="unique-id-1")
|
||||
result_1 = model.invoke([msg_with_id_1])
|
||||
assert result_1.content == "hello"
|
||||
|
||||
# Second call with the same content but different ID should hit cache
|
||||
msg_with_id_2 = HumanMessage(content="How are you?", id="unique-id-2")
|
||||
result_2 = model.invoke([msg_with_id_2])
|
||||
# Should get cached response, not "goodbye"
|
||||
assert result_2.content == "hello"
|
||||
|
||||
# Third call with no ID should also hit cache
|
||||
msg_no_id = HumanMessage(content="How are you?")
|
||||
result_3 = model.invoke([msg_no_id])
|
||||
assert result_3.content == "hello"
|
||||
|
||||
# Verify only one cache entry exists
|
||||
assert len(local_cache._cache) == 1
|
||||
|
||||
|
||||
async def test_cache_key_ignores_message_id_async() -> None:
|
||||
"""Test that message IDs are stripped from cache keys (async).
|
||||
|
||||
Functionally identical messages with different IDs should produce
|
||||
the same cache key and result in cache hits.
|
||||
"""
|
||||
local_cache = InMemoryCache()
|
||||
model = FakeListChatModel(cache=local_cache, responses=["hello", "goodbye"])
|
||||
|
||||
# First call with a message that has an ID
|
||||
msg_with_id_1 = HumanMessage(content="How are you?", id="unique-id-1")
|
||||
result_1 = await model.ainvoke([msg_with_id_1])
|
||||
assert result_1.content == "hello"
|
||||
|
||||
# Second call with the same content but different ID should hit cache
|
||||
msg_with_id_2 = HumanMessage(content="How are you?", id="unique-id-2")
|
||||
result_2 = await model.ainvoke([msg_with_id_2])
|
||||
# Should get cached response, not "goodbye"
|
||||
assert result_2.content == "hello"
|
||||
|
||||
# Third call with no ID should also hit cache
|
||||
msg_no_id = HumanMessage(content="How are you?")
|
||||
result_3 = await model.ainvoke([msg_no_id])
|
||||
assert result_3.content == "hello"
|
||||
|
||||
# Verify only one cache entry exists
|
||||
assert len(local_cache._cache) == 1
|
||||
|
||||
Reference in New Issue
Block a user