mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 15:43:54 +00:00
community[patch]: Enhance iMessage chat loader with timestamp parsing and message ownership (#14804)
--------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
e3abe12243
commit
6cc3c2452c
@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Iterator, List, Optional, Union
|
||||
|
||||
@ -12,6 +13,20 @@ if TYPE_CHECKING:
|
||||
import sqlite3
|
||||
|
||||
|
||||
def nanoseconds_from_2001_to_datetime(nanoseconds: int) -> datetime:
|
||||
# Convert nanoseconds to seconds (1 second = 1e9 nanoseconds)
|
||||
timestamp_in_seconds = nanoseconds / 1e9
|
||||
|
||||
# The reference date is January 1, 2001, in Unix time
|
||||
reference_date_seconds = datetime(2001, 1, 1).timestamp()
|
||||
|
||||
# Calculate the actual timestamp by adding the reference date
|
||||
actual_timestamp = reference_date_seconds + timestamp_in_seconds
|
||||
|
||||
# Convert to a datetime object
|
||||
return datetime.fromtimestamp(actual_timestamp)
|
||||
|
||||
|
||||
class IMessageChatLoader(BaseChatLoader):
|
||||
"""Load chat sessions from the `iMessage` chat.db SQLite file.
|
||||
|
||||
@ -92,17 +107,17 @@ class IMessageChatLoader(BaseChatLoader):
|
||||
results: List[HumanMessage] = []
|
||||
|
||||
query = """
|
||||
SELECT message.date, handle.id, message.text, message.attributedBody
|
||||
SELECT message.date, handle.id, message.text, message.is_from_me, message.attributedBody
|
||||
FROM message
|
||||
JOIN chat_message_join ON message.ROWID = chat_message_join.message_id
|
||||
JOIN handle ON message.handle_id = handle.ROWID
|
||||
WHERE chat_message_join.chat_id = ?
|
||||
ORDER BY message.date ASC;
|
||||
"""
|
||||
""" # noqa: E501
|
||||
cursor.execute(query, (chat_id,))
|
||||
messages = cursor.fetchall()
|
||||
|
||||
for date, sender, text, attributedBody in messages:
|
||||
for date, sender, text, is_from_me, attributedBody in messages:
|
||||
if text:
|
||||
content = text
|
||||
elif attributedBody:
|
||||
@ -116,7 +131,11 @@ class IMessageChatLoader(BaseChatLoader):
|
||||
content=content,
|
||||
additional_kwargs={
|
||||
"message_time": date,
|
||||
"message_time_as_datetime": nanoseconds_from_2001_to_datetime(
|
||||
date
|
||||
),
|
||||
"sender": sender,
|
||||
"is_from_me": bool(is_from_me),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
@ -1,3 +1,4 @@
|
||||
import datetime
|
||||
import pathlib
|
||||
|
||||
from langchain_community.chat_loaders import imessage, utils
|
||||
@ -14,8 +15,27 @@ def test_imessage_chat_loader() -> None:
|
||||
|
||||
assert chat_sessions[0]["messages"], "Chat messages should not be empty"
|
||||
|
||||
first_message = chat_sessions[0]["messages"][0]
|
||||
|
||||
# message content in text field
|
||||
assert "Yeh" in chat_sessions[0]["messages"][0].content, "Chat content mismatch"
|
||||
assert "Yeh" in first_message.content, "Chat content mismatch"
|
||||
|
||||
# time parsed correctly
|
||||
expected_message_time = 720845450393148160
|
||||
assert (
|
||||
first_message.additional_kwargs["message_time"] == expected_message_time
|
||||
), "unexpected time"
|
||||
|
||||
expected_parsed_time = datetime.datetime(2023, 11, 5, 2, 50, 50, 393148)
|
||||
assert (
|
||||
first_message.additional_kwargs["message_time_as_datetime"]
|
||||
== expected_parsed_time
|
||||
), "date failed to parse"
|
||||
|
||||
# is_from_me parsed correctly
|
||||
assert (
|
||||
first_message.additional_kwargs["is_from_me"] is False
|
||||
), "is_from_me failed to parse"
|
||||
|
||||
# short message content in attributedBody field
|
||||
assert (
|
||||
|
Loading…
Reference in New Issue
Block a user