From 1639ccfd1529d81da94fa98bc1cb447f08f50bd0 Mon Sep 17 00:00:00 2001 From: Nithish Raghunandanan <12782505+nithishr@users.noreply.github.com> Date: Tue, 23 Jul 2024 01:30:29 +0200 Subject: [PATCH] couchbase: [patch] Return chat message history in order (#24498) **Description:** Fixes an issue where the chat message history was not returned in order. Fixed it now by returning based on timestamps. - [x] **Add tests and docs**: Updated the tests to check the order 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [x] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ --------- Co-authored-by: Nithish Raghunandanan Co-authored-by: Erick Friis --- .../chat_message_histories.py | 17 +++++++++++++++-- libs/partners/couchbase/pyproject.toml | 2 +- .../test_chat_message_history.py | 17 +++++++++++++---- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/libs/partners/couchbase/langchain_couchbase/chat_message_histories.py b/libs/partners/couchbase/langchain_couchbase/chat_message_histories.py index ba61e3376f9..110763f645a 100644 --- a/libs/partners/couchbase/langchain_couchbase/chat_message_histories.py +++ b/libs/partners/couchbase/langchain_couchbase/chat_message_histories.py @@ -1,4 +1,7 @@ +"""Couchbase Chat Message History""" + import logging +import time import uuid from typing import Any, Dict, List, Sequence @@ -14,6 +17,7 @@ logger = logging.getLogger(__name__) DEFAULT_SESSION_ID_KEY = "session_id" DEFAULT_MESSAGE_KEY = "message" +DEFAULT_TS_KEY = "ts" DEFAULT_INDEX_NAME = "LANGCHAIN_CHAT_HISTORY" DEFAULT_BATCH_SIZE = 100 @@ -128,10 +132,13 @@ class CouchbaseChatMessageHistory(BaseChatMessageHistory): self._message_key = message_key self._create_index = create_index self._session_id = session_id + self._ts_key = DEFAULT_TS_KEY # Create an index if it does not exist if requested if create_index: - index_fields = f"({self._session_id_key}, {self._message_key})" + index_fields = ( + f"({self._session_id_key}, {self._ts_key}, {self._message_key})" + ) index_creation_query = ( f"CREATE INDEX {DEFAULT_INDEX_NAME} IF NOT EXISTS ON " + f"{self._collection_name}{index_fields} " @@ -146,6 +153,8 @@ class CouchbaseChatMessageHistory(BaseChatMessageHistory): """Add a message to the cache""" # Generate a UUID for the document key document_key = uuid.uuid4().hex + # get utc timestamp for ordering the messages + timestamp = time.time() message_content = message_to_dict(message) try: self._collection.insert( @@ -153,6 +162,7 @@ class CouchbaseChatMessageHistory(BaseChatMessageHistory): value={ self._message_key: message_content, self._session_id_key: self._session_id, + self._ts_key: timestamp, }, ) except Exception as e: @@ -164,12 +174,14 @@ class CouchbaseChatMessageHistory(BaseChatMessageHistory): messages_to_insert = [] for message in messages: document_key = uuid.uuid4().hex + timestamp = time.time() message_content = message_to_dict(message) messages_to_insert.append( { document_key: { self._message_key: message_content, self._session_id_key: self._session_id, + self._ts_key: timestamp, }, } ) @@ -189,7 +201,7 @@ class CouchbaseChatMessageHistory(BaseChatMessageHistory): # Delete all documents in the collection with the session_id clear_query = ( f"DELETE FROM `{self._collection_name}`" - + f"where {self._session_id_key}=$session_id" + + f"WHERE {self._session_id_key}=$session_id" ) try: self._scope.query(clear_query, session_id=self._session_id).execute() @@ -202,6 +214,7 @@ class CouchbaseChatMessageHistory(BaseChatMessageHistory): fetch_query = ( f"SELECT {self._message_key} FROM `{self._collection_name}` " + f"where {self._session_id_key}=$session_id" + + f" ORDER BY {self._ts_key} ASC" ) message_items = [] diff --git a/libs/partners/couchbase/pyproject.toml b/libs/partners/couchbase/pyproject.toml index 5be4d771588..36359fbefd2 100644 --- a/libs/partners/couchbase/pyproject.toml +++ b/libs/partners/couchbase/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "langchain-couchbase" -version = "0.1.0" +version = "0.1.1" description = "An integration package connecting Couchbase and LangChain" authors = [] readme = "README.md" diff --git a/libs/partners/couchbase/tests/integration_tests/test_chat_message_history.py b/libs/partners/couchbase/tests/integration_tests/test_chat_message_history.py index eacfb4d9ce8..aaee1ec2199 100644 --- a/libs/partners/couchbase/tests/integration_tests/test_chat_message_history.py +++ b/libs/partners/couchbase/tests/integration_tests/test_chat_message_history.py @@ -95,8 +95,9 @@ class TestCouchbaseCache: # check that the messages are in the memory messages = memory.chat_memory.messages assert len(messages) == 2 - for message in messages: - assert message in [ai_message, user_message] + + # check that the messages are in the order of creation + assert messages == [ai_message, user_message] # clear the memory memory.chat_memory.clear() @@ -147,9 +148,17 @@ class TestCouchbaseCache: messages_b = memory_b.chat_memory.messages assert len(messages_a) == 1 assert len(messages_b) == 1 - assert messages_a[0] == ai_message - assert messages_b[0] == user_message + assert messages_a == [ai_message] + assert messages_b == [user_message] # clear the memory memory_a.chat_memory.clear() + time.sleep(SLEEP_DURATION) + # ensure that only the session that is cleared is empty + assert memory_a.chat_memory.messages == [] + assert memory_b.chat_memory.messages == [user_message] + + # clear the other session's memory memory_b.chat_memory.clear() + time.sleep(SLEEP_DURATION) + assert memory_b.chat_memory.messages == []