fix(core): strip message IDs from cache keys using model_copy (#33915)

**Description:**  

*Closes
#[33883](https://github.com/langchain-ai/langchain/issues/33883)*

Chat model cache keys are generated by serializing messages via
`dumps(messages)`. The optional `BaseMessage.id` field (a UUID used
solely for tracing/threading) is included in this serialization, causing
functionally identical messages to produce different cache keys. This
results in repeated API calls, cache bloat, and degraded performance in
production workloads (e.g., agents, RAG chains, long conversations).

This change normalizes messages **only for cache key generation** by
stripping the nonsemantic `id` field using Pydantic V2’s
`model_copy(update={"id": None})`. The normalization is applied in both
synchronous and asynchronous cache paths (`_generate_with_cache` /
`_agenerate_with_cache`) immediately before `dumps()`.

```python
normalized_messages = [
    msg.model_copy(update={"id": None})
    if getattr(msg, "id", None) is not None
    else msg
    for msg in messages
]
prompt = dumps(normalized_messages)

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
This commit is contained in:
aroun-coumar
2026-01-05 21:07:10 +05:30
committed by GitHub
parent cd5b36456a
commit 730a3676f8
2 changed files with 77 additions and 3 deletions

View File

@@ -1148,7 +1148,15 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
if check_cache:
if llm_cache:
llm_string = self._get_llm_string(stop=stop, **kwargs)
prompt = dumps(messages)
normalized_messages = [
(
msg.model_copy(update={"id": None})
if getattr(msg, "id", None) is not None
else msg
)
for msg in messages
]
prompt = dumps(normalized_messages)
cache_val = llm_cache.lookup(prompt, llm_string)
if isinstance(cache_val, list):
converted_generations = self._convert_cached_generations(cache_val)
@@ -1266,7 +1274,15 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
if check_cache:
if llm_cache:
llm_string = self._get_llm_string(stop=stop, **kwargs)
prompt = dumps(messages)
normalized_messages = [
(
msg.model_copy(update={"id": None})
if getattr(msg, "id", None) is not None
else msg
)
for msg in messages
]
prompt = dumps(normalized_messages)
cache_val = await llm_cache.alookup(prompt, llm_string)
if isinstance(cache_val, list):
converted_generations = self._convert_cached_generations(cache_val)

View File

@@ -13,7 +13,7 @@ from langchain_core.language_models.fake_chat_models import (
GenericFakeChatModel,
)
from langchain_core.load import dumps
from langchain_core.messages import AIMessage
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.outputs import ChatGeneration, Generation
from langchain_core.outputs.chat_result import ChatResult
@@ -475,3 +475,61 @@ def test_token_costs_are_zeroed_out() -> None:
assert isinstance(second_response, AIMessage)
assert second_response.usage_metadata
assert second_response.usage_metadata["total_cost"] == 0 # type: ignore[typeddict-item]
def test_cache_key_ignores_message_id_sync() -> None:
"""Test that message IDs are stripped from cache keys (sync).
Functionally identical messages with different IDs should produce
the same cache key and result in cache hits.
"""
local_cache = InMemoryCache()
model = FakeListChatModel(cache=local_cache, responses=["hello", "goodbye"])
# First call with a message that has an ID
msg_with_id_1 = HumanMessage(content="How are you?", id="unique-id-1")
result_1 = model.invoke([msg_with_id_1])
assert result_1.content == "hello"
# Second call with the same content but different ID should hit cache
msg_with_id_2 = HumanMessage(content="How are you?", id="unique-id-2")
result_2 = model.invoke([msg_with_id_2])
# Should get cached response, not "goodbye"
assert result_2.content == "hello"
# Third call with no ID should also hit cache
msg_no_id = HumanMessage(content="How are you?")
result_3 = model.invoke([msg_no_id])
assert result_3.content == "hello"
# Verify only one cache entry exists
assert len(local_cache._cache) == 1
async def test_cache_key_ignores_message_id_async() -> None:
"""Test that message IDs are stripped from cache keys (async).
Functionally identical messages with different IDs should produce
the same cache key and result in cache hits.
"""
local_cache = InMemoryCache()
model = FakeListChatModel(cache=local_cache, responses=["hello", "goodbye"])
# First call with a message that has an ID
msg_with_id_1 = HumanMessage(content="How are you?", id="unique-id-1")
result_1 = await model.ainvoke([msg_with_id_1])
assert result_1.content == "hello"
# Second call with the same content but different ID should hit cache
msg_with_id_2 = HumanMessage(content="How are you?", id="unique-id-2")
result_2 = await model.ainvoke([msg_with_id_2])
# Should get cached response, not "goodbye"
assert result_2.content == "hello"
# Third call with no ID should also hit cache
msg_no_id = HumanMessage(content="How are you?")
result_3 = await model.ainvoke([msg_no_id])
assert result_3.content == "hello"
# Verify only one cache entry exists
assert len(local_cache._cache) == 1