mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-17 18:23:59 +00:00
communty[minor]: Store Message History to TiDB Database (#16304)
This pull request integrates the TiDB database into LangChain for storing message history, marking one of several steps towards a comprehensive integration of TiDB with LangChain. A simple usage ```python from datetime import datetime from langchain_community.chat_message_histories import TiDBChatMessageHistory history = TiDBChatMessageHistory( connection_string="mysql+pymysql://<host>:<PASSWORD>@<host>:4000/<db>?ssl_ca=/etc/ssl/cert.pem&ssl_verify_cert=true&ssl_verify_identity=true", session_id="code_gen", earliest_time=datetime.utcnow(), # Optional to set earliest_time to load messages after this time point. ) history.add_user_message("hi! How's feature going?") history.add_ai_message("It's almot done") ```
This commit is contained in:
parent
35ec0bbd3b
commit
b9f5104e6c
@ -0,0 +1,77 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TiDB\n",
|
||||
"\n",
|
||||
"> [TiDB](https://github.com/pingcap/tidb) is an open-source, cloud-native, distributed, MySQL-Compatible database for elastic scale and real-time analytics.\n",
|
||||
"\n",
|
||||
"This notebook introduces how to use TiDB to store chat message history. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"from langchain_community.chat_message_histories import TiDBChatMessageHistory\n",
|
||||
"\n",
|
||||
"history = TiDBChatMessageHistory(\n",
|
||||
" connection_string=\"mysql+pymysql://<host>:<PASSWORD>@<host>:4000/<db>?ssl_ca=/etc/ssl/cert.pem&ssl_verify_cert=true&ssl_verify_identity=true\",\n",
|
||||
" session_id=\"code_gen\",\n",
|
||||
" earliest_time=datetime.utcnow(), # Optional to set earliest_time to load messages after this time point.\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"history.add_user_message(\"hi! How's feature going?\")\n",
|
||||
"history.add_ai_message(\"It's almot done\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content=\"hi! How's feature going?\"),\n",
|
||||
" AIMessage(content=\"It's almot done\")]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"history.messages"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -35,6 +35,7 @@ from langchain_community.chat_message_histories.sql import SQLChatMessageHistory
|
||||
from langchain_community.chat_message_histories.streamlit import (
|
||||
StreamlitChatMessageHistory,
|
||||
)
|
||||
from langchain_community.chat_message_histories.tidb import TiDBChatMessageHistory
|
||||
from langchain_community.chat_message_histories.upstash_redis import (
|
||||
UpstashRedisChatMessageHistory,
|
||||
)
|
||||
@ -62,4 +63,5 @@ __all__ = [
|
||||
"ZepChatMessageHistory",
|
||||
"UpstashRedisChatMessageHistory",
|
||||
"Neo4jChatMessageHistory",
|
||||
"TiDBChatMessageHistory",
|
||||
]
|
||||
|
@ -0,0 +1,148 @@
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain_core.chat_history import BaseChatMessageHistory
|
||||
from langchain_core.messages import BaseMessage, message_to_dict, messages_from_dict
|
||||
from sqlalchemy import create_engine, text
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TiDBChatMessageHistory(BaseChatMessageHistory):
|
||||
"""
|
||||
Represents a chat message history stored in a TiDB database.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session_id: str,
|
||||
connection_string: str,
|
||||
table_name: str = "langchain_message_store",
|
||||
earliest_time: Optional[datetime] = None,
|
||||
):
|
||||
"""
|
||||
Initializes a new instance of the TiDBChatMessageHistory class.
|
||||
|
||||
Args:
|
||||
session_id (str): The ID of the chat session.
|
||||
connection_string (str): The connection string for the TiDB database.
|
||||
format: mysql+pymysql://<host>:<PASSWORD>@<host>:4000/<db>?ssl_ca=/etc/ssl/cert.pem&ssl_verify_cert=true&ssl_verify_identity=true
|
||||
table_name (str, optional): the table name to store the chat messages.
|
||||
Defaults to "langchain_message_store".
|
||||
earliest_time (Optional[datetime], optional): The earliest time to retrieve messages from.
|
||||
Defaults to None.
|
||||
""" # noqa
|
||||
|
||||
self.session_id = session_id
|
||||
self.table_name = table_name
|
||||
self.earliest_time = earliest_time
|
||||
self.cache = []
|
||||
|
||||
# Set up SQLAlchemy engine and session
|
||||
self.engine = create_engine(connection_string)
|
||||
Session = sessionmaker(bind=self.engine)
|
||||
self.session = Session()
|
||||
|
||||
self._create_table_if_not_exists()
|
||||
self._load_messages_to_cache()
|
||||
|
||||
def _create_table_if_not_exists(self) -> None:
|
||||
"""
|
||||
Creates a table if it does not already exist in the database.
|
||||
"""
|
||||
|
||||
create_table_query = text(
|
||||
f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.table_name} (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
session_id VARCHAR(255) NOT NULL,
|
||||
message JSON NOT NULL,
|
||||
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
INDEX session_idx (session_id)
|
||||
);"""
|
||||
)
|
||||
try:
|
||||
self.session.execute(create_table_query)
|
||||
self.session.commit()
|
||||
except SQLAlchemyError as e:
|
||||
logger.error(f"Error creating table: {e}")
|
||||
self.session.rollback()
|
||||
|
||||
def _load_messages_to_cache(self) -> None:
|
||||
"""
|
||||
Loads messages from the database into the cache.
|
||||
|
||||
This method retrieves messages from the database table. The retrieved messages
|
||||
are then stored in the cache for faster access.
|
||||
|
||||
Raises:
|
||||
SQLAlchemyError: If there is an error executing the database query.
|
||||
|
||||
"""
|
||||
time_condition = (
|
||||
f"AND create_time >= '{self.earliest_time}'" if self.earliest_time else ""
|
||||
)
|
||||
query = text(
|
||||
f"""
|
||||
SELECT message FROM {self.table_name}
|
||||
WHERE session_id = :session_id {time_condition}
|
||||
ORDER BY id;
|
||||
"""
|
||||
)
|
||||
try:
|
||||
result = self.session.execute(query, {"session_id": self.session_id})
|
||||
for record in result.fetchall():
|
||||
message_dict = json.loads(record[0])
|
||||
self.cache.append(messages_from_dict([message_dict])[0])
|
||||
except SQLAlchemyError as e:
|
||||
logger.error(f"Error loading messages to cache: {e}")
|
||||
|
||||
@property
|
||||
def messages(self) -> List[BaseMessage]:
|
||||
"""returns all messages"""
|
||||
if len(self.cache) == 0:
|
||||
self.reload_cache()
|
||||
return self.cache
|
||||
|
||||
def add_message(self, message: BaseMessage) -> None:
|
||||
"""adds a message to the database and cache"""
|
||||
query = text(
|
||||
f"INSERT INTO {self.table_name} (session_id, message) VALUES (:session_id, :message);" # noqa
|
||||
)
|
||||
try:
|
||||
self.session.execute(
|
||||
query,
|
||||
{
|
||||
"session_id": self.session_id,
|
||||
"message": json.dumps(message_to_dict(message)),
|
||||
},
|
||||
)
|
||||
self.session.commit()
|
||||
self.cache.append(message)
|
||||
except SQLAlchemyError as e:
|
||||
logger.error(f"Error adding message: {e}")
|
||||
self.session.rollback()
|
||||
|
||||
def clear(self) -> None:
|
||||
"""clears all messages"""
|
||||
query = text(f"DELETE FROM {self.table_name} WHERE session_id = :session_id;")
|
||||
try:
|
||||
self.session.execute(query, {"session_id": self.session_id})
|
||||
self.session.commit()
|
||||
self.cache.clear()
|
||||
except SQLAlchemyError as e:
|
||||
logger.error(f"Error clearing messages: {e}")
|
||||
self.session.rollback()
|
||||
|
||||
def reload_cache(self) -> None:
|
||||
"""reloads messages from database to cache"""
|
||||
self.cache.clear()
|
||||
self._load_messages_to_cache()
|
||||
|
||||
def __del__(self) -> None:
|
||||
"""closes the session"""
|
||||
self.session.close()
|
@ -0,0 +1,101 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from langchain_core.messages import AIMessage, HumanMessage
|
||||
|
||||
from langchain_community.chat_message_histories import TiDBChatMessageHistory
|
||||
|
||||
try:
|
||||
CONNECTION_STRING = os.getenv("TEST_TiDB_CHAT_URL", "")
|
||||
|
||||
if CONNECTION_STRING == "":
|
||||
raise OSError("TEST_TiDB_URL environment variable is not set")
|
||||
|
||||
tidb_available = True
|
||||
except (OSError, ImportError):
|
||||
tidb_available = False
|
||||
|
||||
|
||||
@pytest.mark.skipif(not tidb_available, reason="tidb is not available")
|
||||
def test_add_messages() -> None:
|
||||
"""Basic testing: adding messages to the TiDBChatMessageHistory."""
|
||||
message_store = TiDBChatMessageHistory("23334", CONNECTION_STRING)
|
||||
message_store.clear()
|
||||
assert len(message_store.messages) == 0
|
||||
message_store.add_user_message("Hello! Language Chain!")
|
||||
message_store.add_ai_message("Hi Guys!")
|
||||
|
||||
# create another message store to check if the messages are stored correctly
|
||||
message_store_another = TiDBChatMessageHistory("46666", CONNECTION_STRING)
|
||||
message_store_another.clear()
|
||||
assert len(message_store_another.messages) == 0
|
||||
message_store_another.add_user_message("Hello! Bot!")
|
||||
message_store_another.add_ai_message("Hi there!")
|
||||
message_store_another.add_user_message("How's this pr going?")
|
||||
|
||||
# Now check if the messages are stored in the database correctly
|
||||
assert len(message_store.messages) == 2
|
||||
assert isinstance(message_store.messages[0], HumanMessage)
|
||||
assert isinstance(message_store.messages[1], AIMessage)
|
||||
assert message_store.messages[0].content == "Hello! Language Chain!"
|
||||
assert message_store.messages[1].content == "Hi Guys!"
|
||||
|
||||
assert len(message_store_another.messages) == 3
|
||||
assert isinstance(message_store_another.messages[0], HumanMessage)
|
||||
assert isinstance(message_store_another.messages[1], AIMessage)
|
||||
assert isinstance(message_store_another.messages[2], HumanMessage)
|
||||
assert message_store_another.messages[0].content == "Hello! Bot!"
|
||||
assert message_store_another.messages[1].content == "Hi there!"
|
||||
assert message_store_another.messages[2].content == "How's this pr going?"
|
||||
|
||||
# Now clear the first history
|
||||
message_store.clear()
|
||||
assert len(message_store.messages) == 0
|
||||
assert len(message_store_another.messages) == 3
|
||||
message_store_another.clear()
|
||||
assert len(message_store.messages) == 0
|
||||
assert len(message_store_another.messages) == 0
|
||||
|
||||
|
||||
def test_tidb_recent_chat_message():
|
||||
"""Test the TiDBChatMessageHistory with earliest_time parameter."""
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
# prepare some messages
|
||||
message_store = TiDBChatMessageHistory("2333", CONNECTION_STRING)
|
||||
message_store.clear()
|
||||
assert len(message_store.messages) == 0
|
||||
message_store.add_user_message("Hello! Language Chain!")
|
||||
message_store.add_ai_message("Hi Guys!")
|
||||
assert len(message_store.messages) == 2
|
||||
assert isinstance(message_store.messages[0], HumanMessage)
|
||||
assert isinstance(message_store.messages[1], AIMessage)
|
||||
assert message_store.messages[0].content == "Hello! Language Chain!"
|
||||
assert message_store.messages[1].content == "Hi Guys!"
|
||||
|
||||
# now we add some recent messages to the database
|
||||
earliest_time = datetime.utcnow()
|
||||
time.sleep(1)
|
||||
|
||||
message_store.add_user_message("How's this pr going?")
|
||||
message_store.add_ai_message("It's almost done!")
|
||||
assert len(message_store.messages) == 4
|
||||
assert isinstance(message_store.messages[2], HumanMessage)
|
||||
assert isinstance(message_store.messages[3], AIMessage)
|
||||
assert message_store.messages[2].content == "How's this pr going?"
|
||||
assert message_store.messages[3].content == "It's almost done!"
|
||||
|
||||
# now we create another message store with earliest_time parameter
|
||||
message_store_another = TiDBChatMessageHistory(
|
||||
"2333", CONNECTION_STRING, earliest_time=earliest_time
|
||||
)
|
||||
assert len(message_store_another.messages) == 2
|
||||
assert isinstance(message_store_another.messages[0], HumanMessage)
|
||||
assert isinstance(message_store_another.messages[1], AIMessage)
|
||||
assert message_store_another.messages[0].content == "How's this pr going?"
|
||||
assert message_store_another.messages[1].content == "It's almost done!"
|
||||
|
||||
# now we clear the message store
|
||||
message_store.clear()
|
||||
assert len(message_store.messages) == 0
|
Loading…
Reference in New Issue
Block a user