mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-29 01:48:57 +00:00
community[patch]: Enhance iMessage chat loader with timestamp parsing and message ownership (#14804)
--------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
e3abe12243
commit
6cc3c2452c
@ -1,5 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Iterator, List, Optional, Union
|
from typing import TYPE_CHECKING, Iterator, List, Optional, Union
|
||||||
|
|
||||||
@ -12,6 +13,20 @@ if TYPE_CHECKING:
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
||||||
|
|
||||||
|
def nanoseconds_from_2001_to_datetime(nanoseconds: int) -> datetime:
|
||||||
|
# Convert nanoseconds to seconds (1 second = 1e9 nanoseconds)
|
||||||
|
timestamp_in_seconds = nanoseconds / 1e9
|
||||||
|
|
||||||
|
# The reference date is January 1, 2001, in Unix time
|
||||||
|
reference_date_seconds = datetime(2001, 1, 1).timestamp()
|
||||||
|
|
||||||
|
# Calculate the actual timestamp by adding the reference date
|
||||||
|
actual_timestamp = reference_date_seconds + timestamp_in_seconds
|
||||||
|
|
||||||
|
# Convert to a datetime object
|
||||||
|
return datetime.fromtimestamp(actual_timestamp)
|
||||||
|
|
||||||
|
|
||||||
class IMessageChatLoader(BaseChatLoader):
|
class IMessageChatLoader(BaseChatLoader):
|
||||||
"""Load chat sessions from the `iMessage` chat.db SQLite file.
|
"""Load chat sessions from the `iMessage` chat.db SQLite file.
|
||||||
|
|
||||||
@ -92,17 +107,17 @@ class IMessageChatLoader(BaseChatLoader):
|
|||||||
results: List[HumanMessage] = []
|
results: List[HumanMessage] = []
|
||||||
|
|
||||||
query = """
|
query = """
|
||||||
SELECT message.date, handle.id, message.text, message.attributedBody
|
SELECT message.date, handle.id, message.text, message.is_from_me, message.attributedBody
|
||||||
FROM message
|
FROM message
|
||||||
JOIN chat_message_join ON message.ROWID = chat_message_join.message_id
|
JOIN chat_message_join ON message.ROWID = chat_message_join.message_id
|
||||||
JOIN handle ON message.handle_id = handle.ROWID
|
JOIN handle ON message.handle_id = handle.ROWID
|
||||||
WHERE chat_message_join.chat_id = ?
|
WHERE chat_message_join.chat_id = ?
|
||||||
ORDER BY message.date ASC;
|
ORDER BY message.date ASC;
|
||||||
"""
|
""" # noqa: E501
|
||||||
cursor.execute(query, (chat_id,))
|
cursor.execute(query, (chat_id,))
|
||||||
messages = cursor.fetchall()
|
messages = cursor.fetchall()
|
||||||
|
|
||||||
for date, sender, text, attributedBody in messages:
|
for date, sender, text, is_from_me, attributedBody in messages:
|
||||||
if text:
|
if text:
|
||||||
content = text
|
content = text
|
||||||
elif attributedBody:
|
elif attributedBody:
|
||||||
@ -116,7 +131,11 @@ class IMessageChatLoader(BaseChatLoader):
|
|||||||
content=content,
|
content=content,
|
||||||
additional_kwargs={
|
additional_kwargs={
|
||||||
"message_time": date,
|
"message_time": date,
|
||||||
|
"message_time_as_datetime": nanoseconds_from_2001_to_datetime(
|
||||||
|
date
|
||||||
|
),
|
||||||
"sender": sender,
|
"sender": sender,
|
||||||
|
"is_from_me": bool(is_from_me),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import datetime
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
from langchain_community.chat_loaders import imessage, utils
|
from langchain_community.chat_loaders import imessage, utils
|
||||||
@ -14,8 +15,27 @@ def test_imessage_chat_loader() -> None:
|
|||||||
|
|
||||||
assert chat_sessions[0]["messages"], "Chat messages should not be empty"
|
assert chat_sessions[0]["messages"], "Chat messages should not be empty"
|
||||||
|
|
||||||
|
first_message = chat_sessions[0]["messages"][0]
|
||||||
|
|
||||||
# message content in text field
|
# message content in text field
|
||||||
assert "Yeh" in chat_sessions[0]["messages"][0].content, "Chat content mismatch"
|
assert "Yeh" in first_message.content, "Chat content mismatch"
|
||||||
|
|
||||||
|
# time parsed correctly
|
||||||
|
expected_message_time = 720845450393148160
|
||||||
|
assert (
|
||||||
|
first_message.additional_kwargs["message_time"] == expected_message_time
|
||||||
|
), "unexpected time"
|
||||||
|
|
||||||
|
expected_parsed_time = datetime.datetime(2023, 11, 5, 2, 50, 50, 393148)
|
||||||
|
assert (
|
||||||
|
first_message.additional_kwargs["message_time_as_datetime"]
|
||||||
|
== expected_parsed_time
|
||||||
|
), "date failed to parse"
|
||||||
|
|
||||||
|
# is_from_me parsed correctly
|
||||||
|
assert (
|
||||||
|
first_message.additional_kwargs["is_from_me"] is False
|
||||||
|
), "is_from_me failed to parse"
|
||||||
|
|
||||||
# short message content in attributedBody field
|
# short message content in attributedBody field
|
||||||
assert (
|
assert (
|
||||||
|
Loading…
Reference in New Issue
Block a user