mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-16 09:48:04 +00:00
Enhancement : Ignore deleted messages and media in WhatsAppChatLoader (#6839)
- Description: Ignore deleted messages and media - Issue: #6838 - Dependencies: No new dependencies - Tag maintainer: @rlancemartin, @eyurtsev
This commit is contained in:
parent
74848aafea
commit
a980095efc
@ -49,13 +49,15 @@ class WhatsAppChatLoader(BaseLoader):
|
||||
\s
|
||||
(.+)
|
||||
"""
|
||||
ignore_lines = ["This message was deleted", "<Media omitted>"]
|
||||
for line in lines:
|
||||
result = re.match(
|
||||
message_line_regex, line.strip(), flags=re.VERBOSE | re.IGNORECASE
|
||||
)
|
||||
if result:
|
||||
date, sender, text = result.groups()
|
||||
text_content += concatenate_rows(date, sender, text)
|
||||
if text not in ignore_lines:
|
||||
text_content += concatenate_rows(date, sender, text)
|
||||
|
||||
metadata = {"source": str(p)}
|
||||
|
||||
|
@ -6,3 +6,5 @@
|
||||
[2023/5/4, 16:13:23] ~ User 2: See you!
|
||||
7/19/22, 11:32 PM - User 1: Hello
|
||||
7/20/22, 11:32 am - User 2: Goodbye
|
||||
4/20/23, 9:42 am - User 3: <Media omitted>
|
||||
6/29/23, 12:16 am - User 4: This message was deleted
|
||||
|
Loading…
Reference in New Issue
Block a user