fix(document_loaders/telegram): fix pandas calls + add tests (#4806)

# Fix Telegram API loader + add tests.
I was testing this integration and it was broken with next error:
```python
message_threads = loader._get_message_threads(df)
KeyError: False
```
Also, this particular loader didn't have any tests / related group in
poetry, so I added those as well.

@hwchase17 / @eyurtsev please take a look on this fix PR.

---------

Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
Raduan Al-Shedivat
2023-05-16 23:35:25 +02:00
committed by GitHub
parent 206c87d525
commit 00c6ec8a2d
30 changed files with 137 additions and 42 deletions

View File

@@ -1,18 +0,0 @@
from pathlib import Path
from langchain.document_loaders import TelegramChatFileLoader
def test_telegram_chat_file_loader() -> None:
"""Test TelegramChatLoader."""
file_path = Path(__file__).parent.parent / "examples/telegram.json"
loader = TelegramChatFileLoader(str(file_path))
docs = loader.load()
assert len(docs) == 1
assert docs[0].metadata["source"] == str(file_path)
assert docs[0].page_content == (
"Henry on 2020-01-01T00:00:02: It's 2020...\n\n"
"Henry on 2020-01-01T00:00:04: Fireworks!\n\n"
"Grace 🧤 ðŸ\x8d on 2020-01-01T00:00:05: You're a minute late!\n\n"
)

View File

@@ -0,0 +1,34 @@
[
{
"sender_id": -1111111,
"text": "Hello, world!",
"date": "2023-05-15T19:30:49+00:00",
"message.id": 1785,
"is_reply": false,
"reply_to_id": null
},
{
"sender_id": -1111111,
"text": "Telegram is the best!",
"date": "2023-05-08T20:17:10+00:00",
"message.id": 1784,
"is_reply": true,
"reply_to_id": 1783
},
{
"sender_id": -1111111,
"text": "Langchain is great.",
"date": "2023-05-03T23:43:33+00:00",
"message.id": 1783,
"is_reply": true,
"reply_to_id": 1782
},
{
"sender_id": -1111111,
"text": "LLMs are awesome!",
"date": "2023-05-03T15:32:25+00:00",
"message.id": 1782,
"is_reply": false,
"reply_to_id": null
}
]

View File

@@ -0,0 +1,36 @@
from pathlib import Path
import pytest
from langchain.document_loaders import TelegramChatApiLoader, TelegramChatFileLoader
def test_telegram_chat_file_loader() -> None:
"""Test TelegramChatFileLoader."""
file_path = Path(__file__).parent / "test_docs" / "telegram.json"
loader = TelegramChatFileLoader(str(file_path))
docs = loader.load()
assert len(docs) == 1
assert docs[0].metadata["source"] == str(file_path)
assert docs[0].page_content == (
"Henry on 2020-01-01T00:00:02: It's 2020...\n\n"
"Henry on 2020-01-01T00:00:04: Fireworks!\n\n"
"Grace 🧤 ðŸ\x8d on 2020-01-01T00:00:05: You're a minute late!\n\n"
)
@pytest.mark.requires("telethon", "pandas")
def test_telegram_channel_loader_parsing() -> None:
"""Test TelegramChatApiLoader."""
file_path = Path(__file__).parent / "test_docs" / "telegram_channel.json"
# if we don't provide any value, it will skip fetching from telegram
# and will check the parsing logic.
loader = TelegramChatApiLoader(file_path=str(file_path))
docs = loader.load()
assert len(docs) == 1
print(docs[0].page_content)
assert docs[0].page_content == (
"Hello, world!.\nLLMs are awesome! Langchain is great. Telegram is the best!."
)