Fix WhatsAppChatLoader : Enable parsing additional formats (#6663)

- Description: Updated regex to support a new format that was observed
when whatsapp chat was exported.
  - Issue: #6654
  - Dependencies: No new dependencies
  - Tag maintainer: @rlancemartin, @eyurtsev
This commit is contained in:
Augustine Theodore 2023-06-26 00:38:43 +05:30 committed by GitHub
parent 3e30a5d967
commit afc292e58d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 9 additions and 3 deletions

View File

@ -40,7 +40,7 @@ class WhatsAppChatLoader(BaseLoader):
(?:
:\d{2}
)?
(?:[ _](?:AM|PM))?
(?:[\s_](?:AM|PM))?
)
\]?
[\s-]*
@ -50,7 +50,9 @@ class WhatsAppChatLoader(BaseLoader):
(.+)
"""
for line in lines:
result = re.match(message_line_regex, line.strip(), flags=re.VERBOSE)
result = re.match(
message_line_regex, line.strip(), flags=re.VERBOSE | re.IGNORECASE
)
if result:
date, sender, text = result.groups()
text_content += concatenate_rows(date, sender, text)

View File

@ -18,4 +18,6 @@ def test_whatsapp_chat_loader() -> None:
"User 1 on 1/23/23, 3:22_AM: And let me know if anything changes\n\n"
"~ User name 2 on 1/24/21, 12:41:03 PM: Of course!\n\n"
"~ User 2 on 2023/5/4, 16:13:23: See you!\n\n"
"User 1 on 7/19/22, 11:32PM: Hello\n\n"
"User 2 on 7/20/22, 11:32am: Goodbye\n\n"
)

View File

@ -3,4 +3,6 @@
1/23/23, 3:19 AM - User 2: Bye!
1/23/23, 3:22_AM - User 1: And let me know if anything changes
[1/24/21, 12:41:03 PM] ~ User name 2: Of course!
[2023/5/4, 16:13:23] ~ User 2: See you!
[2023/5/4, 16:13:23] ~ User 2: See you!
7/19/22, 11:32PM - User 1: Hello
7/20/22, 11:32am - User 2: Goodbye