community[patch]: update dynamodb chat history to update instead of overwrite (#22397)

**Description:**
The current implementation of `DynamoDBChatMessageHistory` updates the
`History` attribute for a given chat history record by first extracting
the existing contents into memory, appending the new message, and then
using the `put_item` method to put the record back. This has the effect
of overwriting any additional attributes someone may want to include in
the record, like chat session metadata.

This PR suggests changing from using `put_item` to using `update_item`
instead which will keep any other attributes in the record untouched.
The change is backward compatible since
1. `update_item` is an "upsert" operation, creating the record if it
doesn't already exist, otherwise updating it
2. It only touches the db insert call and passes the exact same
information. The rest of the class is left untouched

**Dependencies:**
None

**Tests and docs:**
No unit tests currently exist for the `DynamoDBChatMessageHistory`
class. This PR adds the file
`libs/community/tests/unit_tests/chat_message_histories/test_dynamodb_chat_message_history.py`
to test the `add_message` and `clear` methods. I wanted to use the moto
library to mock DynamoDB calls but I could not get poetry to resolve it
so I mocked those calls myself in the test. Therefore, no test
dependencies were added.

The change was tested on a test DynamoDB table as well. The first three
images below show the current behavior. First a message is added to chat
history, then a value is inserted in the record in some other attribute,
and finally another message is added to the record, destroying the other
attribute.

![using_put_1_first_message](https://github.com/langchain-ai/langchain/assets/29493541/426acd62-fe29-42f4-b75f-863fb8b3fb21)

![using_put_2_add_attribute](https://github.com/langchain-ai/langchain/assets/29493541/f8a1c864-7114-4fe3-b487-d6f9252f8f92)

![using_put_3_second_message](https://github.com/langchain-ai/langchain/assets/29493541/8b691e08-755e-4877-8969-0e9769e5d28a)

The next three images show the new behavior. Once again a value is added
to an attribute other than the History attribute, but now when the
followup message is added it does not destroy that other attribute. The
History attribute itself is unaffected by this change.

![using_update_1_first_message](https://github.com/langchain-ai/langchain/assets/29493541/3e0d76ed-637e-41cd-82c7-01a86c468634)

![using_update_2_add_attribute](https://github.com/langchain-ai/langchain/assets/29493541/52585f9b-71a2-43f0-9dfc-9935aa59c729)

![using_update_3_second_message](https://github.com/langchain-ai/langchain/assets/29493541/f94c8147-2d6f-407a-9a0f-86b94341abff)

The doc located at `docs/docs/integrations/memory/aws_dynamodb.ipynb`
required no changes and was tested as well.
This commit is contained in:
Davi Schumacher 2024-12-16 08:38:00 -07:00 committed by GitHub
parent 6ddd5dbb1e
commit 0f9b4bf244
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 164 additions and 8 deletions

View File

@ -167,16 +167,19 @@ class DynamoDBChatMessageHistory(BaseChatMessageHistory):
import time
expireAt = int(time.time()) + self.ttl
self.table.put_item(
Item={
**self.key,
self.history_messages_key: messages,
self.ttl_key_name: expireAt,
}
self.table.update_item(
Key={**self.key},
UpdateExpression=(
f"set {self.history_messages_key} = :h, "
f"{self.ttl_key_name} = :t"
),
ExpressionAttributeValues={":h": messages, ":t": expireAt},
)
else:
self.table.put_item(
Item={**self.key, self.history_messages_key: messages}
self.table.update_item(
Key={**self.key},
UpdateExpression=f"set {self.history_messages_key} = :h",
ExpressionAttributeValues={":h": messages},
)
except ClientError as err:
logger.error(err)

View File

@ -0,0 +1,153 @@
from typing import Any
import pytest
from langchain_core.messages import AIMessage, HumanMessage, messages_to_dict
from pytest_mock import MockerFixture
from langchain_community.chat_message_histories.dynamodb import (
DynamoDBChatMessageHistory,
)
HISTORY_KEY = "ChatHistory"
TTL_KEY = "TimeToLive"
def dict_to_key(Key: dict) -> tuple:
return tuple(sorted(Key.items()))
class MockDynamoDBChatHistoryTable:
"""Contains the table for the mock DynamoDB resource."""
class Table:
"""Contains methods to mock Boto's DynamoDB calls"""
def __init__(self, *args: tuple, **kwargs: dict[str, Any]) -> None:
self.items: dict = dict()
def get_item(self, Key: dict) -> dict:
return self.items.get(dict_to_key(Key), dict())
def update_item(
self, Key: dict, UpdateExpression: str, ExpressionAttributeValues: dict
) -> None:
update_dict = {HISTORY_KEY: ExpressionAttributeValues[":h"]}
expression = UpdateExpression.split(", ")
if len(expression) > 1:
ttl_key_name = expression[1].replace(" = :t", "")
update_dict.update({ttl_key_name: ExpressionAttributeValues[":t"]})
self.items[dict_to_key(Key)] = {"Item": update_dict}
def delete_item(self, Key: dict) -> None:
if dict_to_key(Key) in self.items.keys():
del self.items[dict_to_key(Key)]
class MockBoto3DynamoDBSession:
"""Creates a mock Boto session to return a DynamoDB table for testing
DynamoDBChatMessageHistory class methods."""
def resource(
self, *args: tuple, **kwargs: dict[str, Any]
) -> MockDynamoDBChatHistoryTable:
return MockDynamoDBChatHistoryTable()
@pytest.fixture(scope="module")
def chat_history_config() -> dict:
return {"key": {"primaryKey": "foo", "secondaryKey": 123}, "ttl": 600}
@pytest.fixture(scope="class")
def ddb_chat_history_with_mock_boto_session(
chat_history_config: dict,
) -> DynamoDBChatMessageHistory:
return DynamoDBChatMessageHistory(
table_name="test_table",
session_id="test_session",
boto3_session=MockBoto3DynamoDBSession(),
key=chat_history_config["key"],
ttl=chat_history_config["ttl"],
ttl_key_name=TTL_KEY,
history_messages_key=HISTORY_KEY,
)
class TestDynamoDBChatMessageHistory:
@pytest.mark.requires("botocore")
def test_add_message(
self,
mocker: MockerFixture,
ddb_chat_history_with_mock_boto_session: DynamoDBChatMessageHistory,
chat_history_config: dict,
) -> None:
# For verifying the TTL value
mock_time_1 = 1234567000
mock_time_2 = 1234568000
# Get the history class and mock DynamoDB table
history: DynamoDBChatMessageHistory = ddb_chat_history_with_mock_boto_session
history_table: MockDynamoDBChatHistoryTable.Table = history.table
history_item = history_table.get_item(chat_history_config["key"])
assert history_item == dict() # Should be empty so far
# Add the first message
mocker.patch("time.time", lambda: mock_time_1)
first_message = HumanMessage(content="new human message")
history.add_message(message=first_message)
item_after_human_message = history_table.get_item(chat_history_config["key"])[
"Item"
]
assert item_after_human_message[HISTORY_KEY] == messages_to_dict(
[first_message]
) # History should only contain the first message
assert (
item_after_human_message[TTL_KEY]
== mock_time_1 + chat_history_config["ttl"]
) # TTL should exist
# Add the second message
mocker.patch("time.time", lambda: mock_time_2)
second_message = AIMessage(content="new AI response")
history.add_message(message=second_message)
item_after_ai_message = history_table.get_item(chat_history_config["key"])[
"Item"
]
assert item_after_ai_message[HISTORY_KEY] == messages_to_dict(
[first_message, second_message]
) # Second message should have appended
assert (
item_after_ai_message[TTL_KEY] == mock_time_2 + chat_history_config["ttl"]
) # TTL should have updated
@pytest.mark.requires("botocore")
def test_clear(
self,
ddb_chat_history_with_mock_boto_session: DynamoDBChatMessageHistory,
chat_history_config: dict,
) -> None:
# Get the history class and mock DynamoDB table
history: DynamoDBChatMessageHistory = ddb_chat_history_with_mock_boto_session
history_table: MockDynamoDBChatHistoryTable.Table = history.table
# Use new key so we get a new chat session and add a message to the new session
new_session_key = {"primaryKey": "bar", "secondaryKey": 456}
history.key = new_session_key
history.add_message(
message=HumanMessage(content="human message for different chat session")
)
# Chat history table should now contain both chat sessions
assert set(history_table.items.keys()) == {
dict_to_key(chat_history_config["key"]),
dict_to_key(new_session_key),
}
# Reset the key to the original and use the clear method
history.key = chat_history_config["key"]
history.clear()
# Only the original chat session should be removed
assert set(history_table.items.keys()) == {dict_to_key(new_session_key)}