mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-17 18:23:59 +00:00
Fix WhatsAppChatLoader : Enable parsing additional formats (#6663)
- Description: Updated regex to support a new format that was observed when whatsapp chat was exported. - Issue: #6654 - Dependencies: No new dependencies - Tag maintainer: @rlancemartin, @eyurtsev
This commit is contained in:
parent
3e30a5d967
commit
afc292e58d
@ -40,7 +40,7 @@ class WhatsAppChatLoader(BaseLoader):
|
|||||||
(?:
|
(?:
|
||||||
:\d{2}
|
:\d{2}
|
||||||
)?
|
)?
|
||||||
(?:[ _](?:AM|PM))?
|
(?:[\s_](?:AM|PM))?
|
||||||
)
|
)
|
||||||
\]?
|
\]?
|
||||||
[\s-]*
|
[\s-]*
|
||||||
@ -50,7 +50,9 @@ class WhatsAppChatLoader(BaseLoader):
|
|||||||
(.+)
|
(.+)
|
||||||
"""
|
"""
|
||||||
for line in lines:
|
for line in lines:
|
||||||
result = re.match(message_line_regex, line.strip(), flags=re.VERBOSE)
|
result = re.match(
|
||||||
|
message_line_regex, line.strip(), flags=re.VERBOSE | re.IGNORECASE
|
||||||
|
)
|
||||||
if result:
|
if result:
|
||||||
date, sender, text = result.groups()
|
date, sender, text = result.groups()
|
||||||
text_content += concatenate_rows(date, sender, text)
|
text_content += concatenate_rows(date, sender, text)
|
||||||
|
@ -18,4 +18,6 @@ def test_whatsapp_chat_loader() -> None:
|
|||||||
"User 1 on 1/23/23, 3:22_AM: And let me know if anything changes\n\n"
|
"User 1 on 1/23/23, 3:22_AM: And let me know if anything changes\n\n"
|
||||||
"~ User name 2 on 1/24/21, 12:41:03 PM: Of course!\n\n"
|
"~ User name 2 on 1/24/21, 12:41:03 PM: Of course!\n\n"
|
||||||
"~ User 2 on 2023/5/4, 16:13:23: See you!\n\n"
|
"~ User 2 on 2023/5/4, 16:13:23: See you!\n\n"
|
||||||
|
"User 1 on 7/19/22, 11:32 PM: Hello\n\n"
|
||||||
|
"User 2 on 7/20/22, 11:32 am: Goodbye\n\n"
|
||||||
)
|
)
|
||||||
|
@ -4,3 +4,5 @@
|
|||||||
1/23/23, 3:22_AM - User 1: And let me know if anything changes
|
1/23/23, 3:22_AM - User 1: And let me know if anything changes
|
||||||
[1/24/21, 12:41:03 PM] ~ User name 2: Of course!
|
[1/24/21, 12:41:03 PM] ~ User name 2: Of course!
|
||||||
[2023/5/4, 16:13:23] ~ User 2: See you!
|
[2023/5/4, 16:13:23] ~ User 2: See you!
|
||||||
|
7/19/22, 11:32 PM - User 1: Hello
|
||||||
|
7/20/22, 11:32 am - User 2: Goodbye
|
||||||
|
Loading…
Reference in New Issue
Block a user