From 6cc3c2452cb09b09152b4c658237a9cbaf318e20 Mon Sep 17 00:00:00 2001 From: Igor Dvorkin Date: Tue, 19 Dec 2023 08:09:01 -0800 Subject: [PATCH] community[patch]: Enhance iMessage chat loader with timestamp parsing and message ownership (#14804) --------- Co-authored-by: Bagatur --- .../chat_loaders/imessage.py | 25 ++++++++++++++++--- .../unit_tests/chat_loaders/test_imessage.py | 22 +++++++++++++++- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/libs/community/langchain_community/chat_loaders/imessage.py b/libs/community/langchain_community/chat_loaders/imessage.py index 2a2ac827e77..8fc22f9c097 100644 --- a/libs/community/langchain_community/chat_loaders/imessage.py +++ b/libs/community/langchain_community/chat_loaders/imessage.py @@ -1,5 +1,6 @@ from __future__ import annotations +from datetime import datetime from pathlib import Path from typing import TYPE_CHECKING, Iterator, List, Optional, Union @@ -12,6 +13,20 @@ if TYPE_CHECKING: import sqlite3 +def nanoseconds_from_2001_to_datetime(nanoseconds: int) -> datetime: + # Convert nanoseconds to seconds (1 second = 1e9 nanoseconds) + timestamp_in_seconds = nanoseconds / 1e9 + + # The reference date is January 1, 2001, in Unix time + reference_date_seconds = datetime(2001, 1, 1).timestamp() + + # Calculate the actual timestamp by adding the reference date + actual_timestamp = reference_date_seconds + timestamp_in_seconds + + # Convert to a datetime object + return datetime.fromtimestamp(actual_timestamp) + + class IMessageChatLoader(BaseChatLoader): """Load chat sessions from the `iMessage` chat.db SQLite file. @@ -92,17 +107,17 @@ class IMessageChatLoader(BaseChatLoader): results: List[HumanMessage] = [] query = """ - SELECT message.date, handle.id, message.text, message.attributedBody + SELECT message.date, handle.id, message.text, message.is_from_me, message.attributedBody FROM message JOIN chat_message_join ON message.ROWID = chat_message_join.message_id JOIN handle ON message.handle_id = handle.ROWID WHERE chat_message_join.chat_id = ? ORDER BY message.date ASC; - """ + """ # noqa: E501 cursor.execute(query, (chat_id,)) messages = cursor.fetchall() - for date, sender, text, attributedBody in messages: + for date, sender, text, is_from_me, attributedBody in messages: if text: content = text elif attributedBody: @@ -116,7 +131,11 @@ class IMessageChatLoader(BaseChatLoader): content=content, additional_kwargs={ "message_time": date, + "message_time_as_datetime": nanoseconds_from_2001_to_datetime( + date + ), "sender": sender, + "is_from_me": bool(is_from_me), }, ) ) diff --git a/libs/community/tests/unit_tests/chat_loaders/test_imessage.py b/libs/community/tests/unit_tests/chat_loaders/test_imessage.py index 01b8aad26d8..26ea6303104 100644 --- a/libs/community/tests/unit_tests/chat_loaders/test_imessage.py +++ b/libs/community/tests/unit_tests/chat_loaders/test_imessage.py @@ -1,3 +1,4 @@ +import datetime import pathlib from langchain_community.chat_loaders import imessage, utils @@ -14,8 +15,27 @@ def test_imessage_chat_loader() -> None: assert chat_sessions[0]["messages"], "Chat messages should not be empty" + first_message = chat_sessions[0]["messages"][0] + # message content in text field - assert "Yeh" in chat_sessions[0]["messages"][0].content, "Chat content mismatch" + assert "Yeh" in first_message.content, "Chat content mismatch" + + # time parsed correctly + expected_message_time = 720845450393148160 + assert ( + first_message.additional_kwargs["message_time"] == expected_message_time + ), "unexpected time" + + expected_parsed_time = datetime.datetime(2023, 11, 5, 2, 50, 50, 393148) + assert ( + first_message.additional_kwargs["message_time_as_datetime"] + == expected_parsed_time + ), "date failed to parse" + + # is_from_me parsed correctly + assert ( + first_message.additional_kwargs["is_from_me"] is False + ), "is_from_me failed to parse" # short message content in attributedBody field assert (