community[patch]: Enhance iMessage chat loader with timestamp parsing and message ownership (#14804)

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Igor Dvorkin 2023-12-19 08:09:01 -08:00 committed by GitHub
parent e3abe12243
commit 6cc3c2452c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 4 deletions

View File

@ -1,5 +1,6 @@
from __future__ import annotations
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING, Iterator, List, Optional, Union
@ -12,6 +13,20 @@ if TYPE_CHECKING:
import sqlite3
def nanoseconds_from_2001_to_datetime(nanoseconds: int) -> datetime:
# Convert nanoseconds to seconds (1 second = 1e9 nanoseconds)
timestamp_in_seconds = nanoseconds / 1e9
# The reference date is January 1, 2001, in Unix time
reference_date_seconds = datetime(2001, 1, 1).timestamp()
# Calculate the actual timestamp by adding the reference date
actual_timestamp = reference_date_seconds + timestamp_in_seconds
# Convert to a datetime object
return datetime.fromtimestamp(actual_timestamp)
class IMessageChatLoader(BaseChatLoader):
"""Load chat sessions from the `iMessage` chat.db SQLite file.
@ -92,17 +107,17 @@ class IMessageChatLoader(BaseChatLoader):
results: List[HumanMessage] = []
query = """
SELECT message.date, handle.id, message.text, message.attributedBody
SELECT message.date, handle.id, message.text, message.is_from_me, message.attributedBody
FROM message
JOIN chat_message_join ON message.ROWID = chat_message_join.message_id
JOIN handle ON message.handle_id = handle.ROWID
WHERE chat_message_join.chat_id = ?
ORDER BY message.date ASC;
"""
""" # noqa: E501
cursor.execute(query, (chat_id,))
messages = cursor.fetchall()
for date, sender, text, attributedBody in messages:
for date, sender, text, is_from_me, attributedBody in messages:
if text:
content = text
elif attributedBody:
@ -116,7 +131,11 @@ class IMessageChatLoader(BaseChatLoader):
content=content,
additional_kwargs={
"message_time": date,
"message_time_as_datetime": nanoseconds_from_2001_to_datetime(
date
),
"sender": sender,
"is_from_me": bool(is_from_me),
},
)
)

View File

@ -1,3 +1,4 @@
import datetime
import pathlib
from langchain_community.chat_loaders import imessage, utils
@ -14,8 +15,27 @@ def test_imessage_chat_loader() -> None:
assert chat_sessions[0]["messages"], "Chat messages should not be empty"
first_message = chat_sessions[0]["messages"][0]
# message content in text field
assert "Yeh" in chat_sessions[0]["messages"][0].content, "Chat content mismatch"
assert "Yeh" in first_message.content, "Chat content mismatch"
# time parsed correctly
expected_message_time = 720845450393148160
assert (
first_message.additional_kwargs["message_time"] == expected_message_time
), "unexpected time"
expected_parsed_time = datetime.datetime(2023, 11, 5, 2, 50, 50, 393148)
assert (
first_message.additional_kwargs["message_time_as_datetime"]
== expected_parsed_time
), "date failed to parse"
# is_from_me parsed correctly
assert (
first_message.additional_kwargs["is_from_me"] is False
), "is_from_me failed to parse"
# short message content in attributedBody field
assert (