diff --git a/libs/community/langchain_community/chat_loaders/imessage.py b/libs/community/langchain_community/chat_loaders/imessage.py index 8fc22f9c097..0e534f65d39 100644 --- a/libs/community/langchain_community/chat_loaders/imessage.py +++ b/libs/community/langchain_community/chat_loaders/imessage.py @@ -91,8 +91,38 @@ class IMessageChatLoader(BaseChatLoader): length, start = int.from_bytes(content[1:3], "little"), 3 return content[start : start + length].decode("utf-8", errors="ignore") + def _get_session_query(self, use_chat_handle_table: bool) -> str: + # Messages sent pre OSX 12 require a join through the chat_handle_join table + # However, the table doesn't exist if database created with OSX 12 or above. + + joins_w_chat_handle = """ + JOIN chat_handle_join ON + chat_message_join.chat_id = chat_handle_join.chat_id + JOIN handle ON + handle.ROWID = chat_handle_join.handle_id""" + + joins_no_chat_handle = """ + JOIN handle ON message.handle_id = handle.ROWID + """ + + joins = joins_w_chat_handle if use_chat_handle_table else joins_no_chat_handle + + return f""" + SELECT message.date, + handle.id, + message.text, + message.is_from_me, + message.attributedBody + FROM message + JOIN chat_message_join ON + message.ROWID = chat_message_join.message_id + {joins} + WHERE chat_message_join.chat_id = ? + ORDER BY message.date ASC; + """ + def _load_single_chat_session( - self, cursor: "sqlite3.Cursor", chat_id: int + self, cursor: "sqlite3.Cursor", use_chat_handle_table: bool, chat_id: int ) -> ChatSession: """ Load a single chat session from the iMessage chat.db. @@ -106,14 +136,7 @@ class IMessageChatLoader(BaseChatLoader): """ results: List[HumanMessage] = [] - query = """ - SELECT message.date, handle.id, message.text, message.is_from_me, message.attributedBody - FROM message - JOIN chat_message_join ON message.ROWID = chat_message_join.message_id - JOIN handle ON message.handle_id = handle.ROWID - WHERE chat_message_join.chat_id = ? - ORDER BY message.date ASC; - """ # noqa: E501 + query = self._get_session_query(use_chat_handle_table) cursor.execute(query, (chat_id,)) messages = cursor.fetchall() @@ -165,6 +188,13 @@ class IMessageChatLoader(BaseChatLoader): ) from e cursor = conn.cursor() + # See if chat_handle_join table exists: + query = """SELECT name FROM sqlite_master + WHERE type='table' AND name='chat_handle_join';""" + + cursor.execute(query) + is_chat_handle_join_exists = cursor.fetchone() + # Fetch the list of chat IDs sorted by time (most recent first) query = """SELECT chat_id FROM message @@ -175,6 +205,8 @@ class IMessageChatLoader(BaseChatLoader): chat_ids = [row[0] for row in cursor.fetchall()] for chat_id in chat_ids: - yield self._load_single_chat_session(cursor, chat_id) + yield self._load_single_chat_session( + cursor, is_chat_handle_join_exists, chat_id + ) conn.close() diff --git a/libs/community/tests/unit_tests/chat_loaders/data/imessage_chat_upgrade_osx_11.db b/libs/community/tests/unit_tests/chat_loaders/data/imessage_chat_upgrade_osx_11.db new file mode 100644 index 00000000000..34a9d621d7e Binary files /dev/null and b/libs/community/tests/unit_tests/chat_loaders/data/imessage_chat_upgrade_osx_11.db differ diff --git a/libs/community/tests/unit_tests/chat_loaders/test_imessage.py b/libs/community/tests/unit_tests/chat_loaders/test_imessage.py index 26ea6303104..4f6bc171730 100644 --- a/libs/community/tests/unit_tests/chat_loaders/test_imessage.py +++ b/libs/community/tests/unit_tests/chat_loaders/test_imessage.py @@ -4,6 +4,41 @@ import pathlib from langchain_community.chat_loaders import imessage, utils +def test_imessage_chat_loader_upgrade_osx11() -> None: + chat_path = ( + pathlib.Path(__file__).parent / "data" / "imessage_chat_upgrade_osx_11.db" + ) + loader = imessage.IMessageChatLoader(str(chat_path)) + + chat_sessions = list( + utils.map_ai_messages(loader.lazy_load(), sender="testemail@gmail.com") + ) + assert chat_sessions, "Chat sessions should not be empty" + + assert chat_sessions[0]["messages"], "Chat messages should not be empty" + + first_message = chat_sessions[0]["messages"][0] + # message content in text field + assert "Yeh" in first_message.content, "Chat content mismatch" + + # time parsed correctly + expected_message_time = 720845450393148160 + assert ( + first_message.additional_kwargs["message_time"] == expected_message_time + ), "unexpected time" + + expected_parsed_time = datetime.datetime(2023, 11, 5, 2, 50, 50, 393148) + assert ( + first_message.additional_kwargs["message_time_as_datetime"] + == expected_parsed_time + ), "date failed to parse" + + # is_from_me parsed correctly + assert ( + first_message.additional_kwargs["is_from_me"] is False + ), "is_from_me failed to parse" + + def test_imessage_chat_loader() -> None: chat_path = pathlib.Path(__file__).parent / "data" / "imessage_chat.db" loader = imessage.IMessageChatLoader(str(chat_path))