mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 13:36:15 +00:00
fix(document_loaders/telegram): fix pandas calls + add tests (#4806)
# Fix Telegram API loader + add tests. I was testing this integration and it was broken with next error: ```python message_threads = loader._get_message_threads(df) KeyError: False ``` Also, this particular loader didn't have any tests / related group in poetry, so I added those as well. @hwchase17 / @eyurtsev please take a look on this fix PR. --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
committed by
GitHub
parent
206c87d525
commit
00c6ec8a2d
@@ -1,18 +0,0 @@
|
||||
from pathlib import Path
|
||||
|
||||
from langchain.document_loaders import TelegramChatFileLoader
|
||||
|
||||
|
||||
def test_telegram_chat_file_loader() -> None:
|
||||
"""Test TelegramChatLoader."""
|
||||
file_path = Path(__file__).parent.parent / "examples/telegram.json"
|
||||
loader = TelegramChatFileLoader(str(file_path))
|
||||
docs = loader.load()
|
||||
|
||||
assert len(docs) == 1
|
||||
assert docs[0].metadata["source"] == str(file_path)
|
||||
assert docs[0].page_content == (
|
||||
"Henry on 2020-01-01T00:00:02: It's 2020...\n\n"
|
||||
"Henry on 2020-01-01T00:00:04: Fireworks!\n\n"
|
||||
"Grace 🧤 ðŸ\x8d’ on 2020-01-01T00:00:05: You're a minute late!\n\n"
|
||||
)
|
@@ -0,0 +1,34 @@
|
||||
[
|
||||
{
|
||||
"sender_id": -1111111,
|
||||
"text": "Hello, world!",
|
||||
"date": "2023-05-15T19:30:49+00:00",
|
||||
"message.id": 1785,
|
||||
"is_reply": false,
|
||||
"reply_to_id": null
|
||||
},
|
||||
{
|
||||
"sender_id": -1111111,
|
||||
"text": "Telegram is the best!",
|
||||
"date": "2023-05-08T20:17:10+00:00",
|
||||
"message.id": 1784,
|
||||
"is_reply": true,
|
||||
"reply_to_id": 1783
|
||||
},
|
||||
{
|
||||
"sender_id": -1111111,
|
||||
"text": "Langchain is great.",
|
||||
"date": "2023-05-03T23:43:33+00:00",
|
||||
"message.id": 1783,
|
||||
"is_reply": true,
|
||||
"reply_to_id": 1782
|
||||
},
|
||||
{
|
||||
"sender_id": -1111111,
|
||||
"text": "LLMs are awesome!",
|
||||
"date": "2023-05-03T15:32:25+00:00",
|
||||
"message.id": 1782,
|
||||
"is_reply": false,
|
||||
"reply_to_id": null
|
||||
}
|
||||
]
|
36
tests/unit_tests/document_loaders/test_telegram.py
Normal file
36
tests/unit_tests/document_loaders/test_telegram.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.document_loaders import TelegramChatApiLoader, TelegramChatFileLoader
|
||||
|
||||
|
||||
def test_telegram_chat_file_loader() -> None:
|
||||
"""Test TelegramChatFileLoader."""
|
||||
file_path = Path(__file__).parent / "test_docs" / "telegram.json"
|
||||
loader = TelegramChatFileLoader(str(file_path))
|
||||
docs = loader.load()
|
||||
|
||||
assert len(docs) == 1
|
||||
assert docs[0].metadata["source"] == str(file_path)
|
||||
assert docs[0].page_content == (
|
||||
"Henry on 2020-01-01T00:00:02: It's 2020...\n\n"
|
||||
"Henry on 2020-01-01T00:00:04: Fireworks!\n\n"
|
||||
"Grace 🧤 ðŸ\x8d’ on 2020-01-01T00:00:05: You're a minute late!\n\n"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.requires("telethon", "pandas")
|
||||
def test_telegram_channel_loader_parsing() -> None:
|
||||
"""Test TelegramChatApiLoader."""
|
||||
file_path = Path(__file__).parent / "test_docs" / "telegram_channel.json"
|
||||
# if we don't provide any value, it will skip fetching from telegram
|
||||
# and will check the parsing logic.
|
||||
loader = TelegramChatApiLoader(file_path=str(file_path))
|
||||
docs = loader.load()
|
||||
|
||||
assert len(docs) == 1
|
||||
print(docs[0].page_content)
|
||||
assert docs[0].page_content == (
|
||||
"Hello, world!.\nLLMs are awesome! Langchain is great. Telegram is the best!."
|
||||
)
|
Reference in New Issue
Block a user