From 76923e5743d1f0ce1131d756e60376fe26a07cce Mon Sep 17 00:00:00 2001 From: Igor Dvorkin Date: Mon, 1 Jan 2024 16:04:14 -0800 Subject: [PATCH] Restore self message sent before OSX 12 Monterey (#14818) --- .../chat_loaders/imessage.py | 52 ++++++++++++++---- .../data/imessage_chat_upgrade_osx_11.db | Bin 0 -> 53248 bytes .../unit_tests/chat_loaders/test_imessage.py | 35 ++++++++++++ 3 files changed, 77 insertions(+), 10 deletions(-) create mode 100644 libs/community/tests/unit_tests/chat_loaders/data/imessage_chat_upgrade_osx_11.db diff --git a/libs/community/langchain_community/chat_loaders/imessage.py b/libs/community/langchain_community/chat_loaders/imessage.py index 8fc22f9c097..0e534f65d39 100644 --- a/libs/community/langchain_community/chat_loaders/imessage.py +++ b/libs/community/langchain_community/chat_loaders/imessage.py @@ -91,8 +91,38 @@ class IMessageChatLoader(BaseChatLoader): length, start = int.from_bytes(content[1:3], "little"), 3 return content[start : start + length].decode("utf-8", errors="ignore") + def _get_session_query(self, use_chat_handle_table: bool) -> str: + # Messages sent pre OSX 12 require a join through the chat_handle_join table + # However, the table doesn't exist if database created with OSX 12 or above. + + joins_w_chat_handle = """ + JOIN chat_handle_join ON + chat_message_join.chat_id = chat_handle_join.chat_id + JOIN handle ON + handle.ROWID = chat_handle_join.handle_id""" + + joins_no_chat_handle = """ + JOIN handle ON message.handle_id = handle.ROWID + """ + + joins = joins_w_chat_handle if use_chat_handle_table else joins_no_chat_handle + + return f""" + SELECT message.date, + handle.id, + message.text, + message.is_from_me, + message.attributedBody + FROM message + JOIN chat_message_join ON + message.ROWID = chat_message_join.message_id + {joins} + WHERE chat_message_join.chat_id = ? + ORDER BY message.date ASC; + """ + def _load_single_chat_session( - self, cursor: "sqlite3.Cursor", chat_id: int + self, cursor: "sqlite3.Cursor", use_chat_handle_table: bool, chat_id: int ) -> ChatSession: """ Load a single chat session from the iMessage chat.db. @@ -106,14 +136,7 @@ class IMessageChatLoader(BaseChatLoader): """ results: List[HumanMessage] = [] - query = """ - SELECT message.date, handle.id, message.text, message.is_from_me, message.attributedBody - FROM message - JOIN chat_message_join ON message.ROWID = chat_message_join.message_id - JOIN handle ON message.handle_id = handle.ROWID - WHERE chat_message_join.chat_id = ? - ORDER BY message.date ASC; - """ # noqa: E501 + query = self._get_session_query(use_chat_handle_table) cursor.execute(query, (chat_id,)) messages = cursor.fetchall() @@ -165,6 +188,13 @@ class IMessageChatLoader(BaseChatLoader): ) from e cursor = conn.cursor() + # See if chat_handle_join table exists: + query = """SELECT name FROM sqlite_master + WHERE type='table' AND name='chat_handle_join';""" + + cursor.execute(query) + is_chat_handle_join_exists = cursor.fetchone() + # Fetch the list of chat IDs sorted by time (most recent first) query = """SELECT chat_id FROM message @@ -175,6 +205,8 @@ class IMessageChatLoader(BaseChatLoader): chat_ids = [row[0] for row in cursor.fetchall()] for chat_id in chat_ids: - yield self._load_single_chat_session(cursor, chat_id) + yield self._load_single_chat_session( + cursor, is_chat_handle_join_exists, chat_id + ) conn.close() diff --git a/libs/community/tests/unit_tests/chat_loaders/data/imessage_chat_upgrade_osx_11.db b/libs/community/tests/unit_tests/chat_loaders/data/imessage_chat_upgrade_osx_11.db new file mode 100644 index 0000000000000000000000000000000000000000..34a9d621d7e65d290dd48d704aaa65d658130c8f GIT binary patch literal 53248 zcmeHQ4RG7meFp`Jq9jwWom85wIYH9q!?BP61PKyZ;us*nSgA$Hmej;cdMrf3B5IOQ zNHC7FbdfFE?e%tYn`B**ZcV%Gc3JOsvbWu?&AMy0?$&+X<-=*~OEevS~=fBcJK>P;fl^!vqdrWofeoJ)768OkOMH3VP<#rcqjd9!~C4rD>I(mgGGuedYSw zSJKz^_c>RM+?9P}>1p*=b>ApltH;@NwSW8ly}iRX+z>c?&1%v`y;9NUn$m8*5NeTl z^L;1v#+BxzM(bH^y0-n{PxSN-kBkKVl(dttKUNQdYm%y8q_5pKar>A;*Na6@?i(AI z_U)%XuI{I$ndynKG(0t~rl&{gxvG()r`0>A>6!G{-WionPfXM4nLT?(X-mJ?vTu#j zWqrP&Wpy_~MNLUFd!}i2l&-EasW)9WDw%g&rJG=b`v;7YQOU19wC=WEG*{DJcke*iEM|&s z&eq-4i}Th(Ba7?qL8i2YwrOfr%Y*{X7ur2O4>z@@g073b8Jl&tDhm}$N13+kO`F_R zfJCis*{$-B z)v|dVj8Y{7G=|?rC{4@EI)Y`>ES5CLZJn2P@|vXqaV)4Gt#e=t>AC8B!N>v|Tb2W` z(5gAZya3oxrQ0yU=K0#&qcn6hikWux&9w4*xo(eJK{Ho2tMeJlunKy;w9XYRYeCI! z8*L>IWXn`6&8oHp(z%~A%79lJ3jtv0_s$z-)M&KInSIkqqvicNn=%*3X+x}HR?Dpo zrBG(hG6bSVIU202{; z2W^Prpp}7chjGAYo!@SCUiS3(4!UU396_}Ya#_bm*AM5MAa4dR1l&Xt*jiAoK zAADd0Faj6>i~vReBY+XW2w(&-0vG{|07d{K(D4XR=ZMhOp^0gM1f03(1A=vV|`Tz+L&i(omf3lYvZMgSv#5x@vw1TX>^0gM1f z03(1AzzARjFaog2zl$6p>rNQR|C#R(kXr(dAAnnX03VWi1);QodW(9EdZqh{E(L>z z5%7+{(jCiZ_TEX%fJX`N33THR4i~#Q$#+G za))0gYk^(0T5WTBN^%{-O@Yge(i~B%h0;?Kwg)-l=z|~xr_Qaw!?nPUp2>-ciCd4= zx{R?v54yb}J*617cPMZwM1Y79c(C@RFKx?Y4vvjGn{o6>aO~nHlm_o4$LbNMtHoKp z3~BbJr#_(-syd3`y4s_U6331m>u=2AsW-ZFbdl9ryhtpjB;&Ct6N^CMqtTSYNU|(4 zs-i@aEFTq8iFiNp)0@AwG)?pqZ+zam?T8!Bf_txu#j?)@y2$P+t(cvvR;-PHe|XbJ zh8@*{qfh5RHlX?VwH=%gjVBTb5ssyU%V#D*xiMS0OPlM;1*N)LZ78)7BF)0lq+Eh% zt6Y&2Re_bFOf(jaGf^ojG7`rn8Cii}|L>o|FI^usI(PKn-x2y( zfASdNo4#w^%V#Fs%DmLwT$ydP*0l`!ZLjZoWu8GSSXY@@DXwynXv;Kd^MIw79HoXkL8(#o^~azx|MaHs-oNCAz9~7zCn6Cp zqA+cf2S0?g znayg*BPjT{TA&?TjC2Am_}{qmahDDadiX~RE}n|URE3G8IB0N^$P9BV$8gCc&ntXX zg!XpnuH`f1E-jAS-%JbJAh^xKh3y0C2CTOaF%Nn0j~0Emo0@&(9TN0)0tv1j{N!IF z5?mesw*fcwjRZ@Fmd}iVg10%nx%-+Me9sL)y}@tQ0Pt*5V1)?BL^;;(%?V5b+5!ez zdg$gv)tC@hjLE!N(rLjzMTZ(uI(iOuf}SsrPLuvM>}otE$Rfuxs;UC+aW%n+(O80! z6=-o4NtD%uv{YO^vm11OiKF{@zjf~=4`8VMQ#%QHvA{y-wIELui9{K!_yo7c*DYmG>GE^BY+XW2w(&-0vG{|07d{KfD!oRMBqAd zhD;@eNGi&snIS&GM0qvIhQ$owU?zb{DA70{O~O@4AWt7d zqF!DXkt=&`(L|g}rBV`;l9Es?F%@OxWK3ifA)1P?svL{NRr2&LNK_IaDtesayf|0fu;Tb`S#1%0j$x)djPw%ot zJ-q!OS0<5YLgrI(5#mayi6o5Q0RaFB5QwrOuLvTSz;t56@!<1oD@y+YLpcOa^-0x>SgAL zTp2^65h=n+ViL-c5@3Qml7PZXF@+IQKqQG1*`%5xPv4A0IXTHiIAFD!hyn+=c#M%_ zl8nHO3MwZkYD6JdPT8V~csvnJcmTtl*B3m zusQ;7v4Ad!s)#+TAgRd+&@Lu{3O;L#Ce;KhvMEMU6vS^8-hf3Fj!~qz#HUh;6c>w= zE6>=X9$uc1D-*UTE5JM(ur8A1fddN1LS7ZfdIZ$VC)g;LN|GzjqxJv8)NX=$lX{hU znfeiRntF!%N9rl+8`KHv3F@oVBh(kD&ryriz0^FFr#?m9K}~`i~vReBk(JUKya8O`(UmrIAn*P4SvK9j|DHa!=u4V>~L3bvmM?N++>G4 zgB$Je<{)K z?6Jd%V7DEn;WCPEJlJK2W5IwO?hX$Q61~JYL5y$M+|Px!1~!w{wzp!2jCPR7y$=?r8{A>$m)K- z#&$5@Hurm3faG6$A5wMusBa4Dgl%ANKmX@H{AZ%S`{eV#m-zW&BlK;%U#)NX?6j-j zrNQR?oSwJ)oo~OlhjmE)>34J5q+fmf!9}_R`@RpEWh-Cb-Mg^^wtv0x?cnqOzSvK^ zed-6F;$dT_6Z)oUFALq4vdd?uR@uVXa-mpX8xUjT;`RqQ>^G6-89z^p-&-yf8V>Yi5+QtB8`J5fw z$YslUX7xA^D<#wVeHQKL#Xpq~G{x#@d!Q3){*x~}G3(#P&!w5=v&bRO<<2(V#vXm& ze71O5iabVFbe*mkW&d>B=f>%1wi7n%zwyl{U)i+iTfMD?m(L<^JWvb4d(YBPvwilZ zwf3+U**}F2Nu?ZJ*5@^&OlwAIZHqnB3HA2j-~8>Tpv8XU8-Mo$-3@)ywwJZ>wHueu zHu>vmymPht=YBB4OCJ1VMACNCQY{^Tu3NRFSvt69sbtH#UZRZ(oh@tGgL;mxm~>Vv zy}0-&v>yd+ft~}0TNT^Q&mp}~aEyc03H15G4Etl3HVqp2MjtO5VZy!NCxXO7MED;D z4uyWFhwAzZIJvk-Z3Mg&fQ=K^**N5J%rm3UvIL^kc_Q>;%K^xEj4UnZC`;+g(j74=sM7Xw2oU1^+6<;eWd5#@?;w@3Xn7 z&#OX-I-z*d^pEA6p?V#jaaxJUyKW_Fw;exmGf;i%y+&|>`8O~7pU3Fi zrSvpCHcd}Y(6UO8tMrtbR>sn|($l-grs&DBq<_rMy=!am9y&vgz0Y+5AwP5Jk?;Cv z?`yH+CvE~dJ>U$P?t0gJya1iNI270oLvIs2tZGGi&P1ao%Y-RFnBb|HMV&_et?9Z8 z`pWh6*j&kkF%?+Z;`RQKOnlFIaF`=@0&|R>;A{Swvhct;g6v%+!msx(_SG)#z#pfM z5x@vw1TX>^fnOB_mL52MvKM-8*VsKbcgtMRugPvNeu$)kVbX`RiapQmUvG?@IH)`Q zst)kI+p*7+y2KiLKKBoqi8-hICB=Xi0m z)<*4H(o^GAOPej|?$>Q{?q_s7eO!lfW2k>)r}=w3zYE{nxpI$r54_WRaq;(CzY}?J z@edppUG~o2IXsU(#`DM{@AYFm5W#h|FMrvukMVRrwR`l;R~Gww_HLiM`fF}DeXPA+ZOExN9PWpGdYyCkZ@l@# zJ|K4vS)O&i-ak;w0~VqU@jP*B?T4GNy@Ag)d9pXX$4>@9^Ugf$5%&d}Uu}+;JnYep zf@U{5Xr|Tv`jw9>g}Rk{>FXS;(fv-V@nQRfSNF;9PrZD~|F*}A$wBM?1Jqjt{KE%E o03(1AzzARjFaj6>i~vReBY+XW2w(&-0>5ks^aRMhuB}7=5534er~m)} literal 0 HcmV?d00001 diff --git a/libs/community/tests/unit_tests/chat_loaders/test_imessage.py b/libs/community/tests/unit_tests/chat_loaders/test_imessage.py index 26ea6303104..4f6bc171730 100644 --- a/libs/community/tests/unit_tests/chat_loaders/test_imessage.py +++ b/libs/community/tests/unit_tests/chat_loaders/test_imessage.py @@ -4,6 +4,41 @@ import pathlib from langchain_community.chat_loaders import imessage, utils +def test_imessage_chat_loader_upgrade_osx11() -> None: + chat_path = ( + pathlib.Path(__file__).parent / "data" / "imessage_chat_upgrade_osx_11.db" + ) + loader = imessage.IMessageChatLoader(str(chat_path)) + + chat_sessions = list( + utils.map_ai_messages(loader.lazy_load(), sender="testemail@gmail.com") + ) + assert chat_sessions, "Chat sessions should not be empty" + + assert chat_sessions[0]["messages"], "Chat messages should not be empty" + + first_message = chat_sessions[0]["messages"][0] + # message content in text field + assert "Yeh" in first_message.content, "Chat content mismatch" + + # time parsed correctly + expected_message_time = 720845450393148160 + assert ( + first_message.additional_kwargs["message_time"] == expected_message_time + ), "unexpected time" + + expected_parsed_time = datetime.datetime(2023, 11, 5, 2, 50, 50, 393148) + assert ( + first_message.additional_kwargs["message_time_as_datetime"] + == expected_parsed_time + ), "date failed to parse" + + # is_from_me parsed correctly + assert ( + first_message.additional_kwargs["is_from_me"] is False + ), "is_from_me failed to parse" + + def test_imessage_chat_loader() -> None: chat_path = pathlib.Path(__file__).parent / "data" / "imessage_chat.db" loader = imessage.IMessageChatLoader(str(chat_path))