Restore self message sent before OSX 12 Monterey (#14818)

<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes (if applicable),
  - **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->
This commit is contained in:
Igor Dvorkin 2024-01-01 16:04:14 -08:00 committed by GitHub
parent d006be60ec
commit 76923e5743
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 77 additions and 10 deletions

View File

@ -91,8 +91,38 @@ class IMessageChatLoader(BaseChatLoader):
length, start = int.from_bytes(content[1:3], "little"), 3
return content[start : start + length].decode("utf-8", errors="ignore")
def _get_session_query(self, use_chat_handle_table: bool) -> str:
# Messages sent pre OSX 12 require a join through the chat_handle_join table
# However, the table doesn't exist if database created with OSX 12 or above.
joins_w_chat_handle = """
JOIN chat_handle_join ON
chat_message_join.chat_id = chat_handle_join.chat_id
JOIN handle ON
handle.ROWID = chat_handle_join.handle_id"""
joins_no_chat_handle = """
JOIN handle ON message.handle_id = handle.ROWID
"""
joins = joins_w_chat_handle if use_chat_handle_table else joins_no_chat_handle
return f"""
SELECT message.date,
handle.id,
message.text,
message.is_from_me,
message.attributedBody
FROM message
JOIN chat_message_join ON
message.ROWID = chat_message_join.message_id
{joins}
WHERE chat_message_join.chat_id = ?
ORDER BY message.date ASC;
"""
def _load_single_chat_session(
self, cursor: "sqlite3.Cursor", chat_id: int
self, cursor: "sqlite3.Cursor", use_chat_handle_table: bool, chat_id: int
) -> ChatSession:
"""
Load a single chat session from the iMessage chat.db.
@ -106,14 +136,7 @@ class IMessageChatLoader(BaseChatLoader):
"""
results: List[HumanMessage] = []
query = """
SELECT message.date, handle.id, message.text, message.is_from_me, message.attributedBody
FROM message
JOIN chat_message_join ON message.ROWID = chat_message_join.message_id
JOIN handle ON message.handle_id = handle.ROWID
WHERE chat_message_join.chat_id = ?
ORDER BY message.date ASC;
""" # noqa: E501
query = self._get_session_query(use_chat_handle_table)
cursor.execute(query, (chat_id,))
messages = cursor.fetchall()
@ -165,6 +188,13 @@ class IMessageChatLoader(BaseChatLoader):
) from e
cursor = conn.cursor()
# See if chat_handle_join table exists:
query = """SELECT name FROM sqlite_master
WHERE type='table' AND name='chat_handle_join';"""
cursor.execute(query)
is_chat_handle_join_exists = cursor.fetchone()
# Fetch the list of chat IDs sorted by time (most recent first)
query = """SELECT chat_id
FROM message
@ -175,6 +205,8 @@ class IMessageChatLoader(BaseChatLoader):
chat_ids = [row[0] for row in cursor.fetchall()]
for chat_id in chat_ids:
yield self._load_single_chat_session(cursor, chat_id)
yield self._load_single_chat_session(
cursor, is_chat_handle_join_exists, chat_id
)
conn.close()

View File

@ -4,6 +4,41 @@ import pathlib
from langchain_community.chat_loaders import imessage, utils
def test_imessage_chat_loader_upgrade_osx11() -> None:
chat_path = (
pathlib.Path(__file__).parent / "data" / "imessage_chat_upgrade_osx_11.db"
)
loader = imessage.IMessageChatLoader(str(chat_path))
chat_sessions = list(
utils.map_ai_messages(loader.lazy_load(), sender="testemail@gmail.com")
)
assert chat_sessions, "Chat sessions should not be empty"
assert chat_sessions[0]["messages"], "Chat messages should not be empty"
first_message = chat_sessions[0]["messages"][0]
# message content in text field
assert "Yeh" in first_message.content, "Chat content mismatch"
# time parsed correctly
expected_message_time = 720845450393148160
assert (
first_message.additional_kwargs["message_time"] == expected_message_time
), "unexpected time"
expected_parsed_time = datetime.datetime(2023, 11, 5, 2, 50, 50, 393148)
assert (
first_message.additional_kwargs["message_time_as_datetime"]
== expected_parsed_time
), "date failed to parse"
# is_from_me parsed correctly
assert (
first_message.additional_kwargs["is_from_me"] is False
), "is_from_me failed to parse"
def test_imessage_chat_loader() -> None:
chat_path = pathlib.Path(__file__).parent / "data" / "imessage_chat.db"
loader = imessage.IMessageChatLoader(str(chat_path))