langchain/libs/community/tests/unit_tests/chat_loaders/test_telegram.py
Bagatur a0c2281540
infra: update mypy 1.10, ruff 0.5 (#23721)
```python
"""python scripts/update_mypy_ruff.py"""
import glob
import tomllib
from pathlib import Path

import toml
import subprocess
import re

ROOT_DIR = Path(__file__).parents[1]


def main():
    for path in glob.glob(str(ROOT_DIR / "libs/**/pyproject.toml"), recursive=True):
        print(path)
        with open(path, "rb") as f:
            pyproject = tomllib.load(f)
        try:
            pyproject["tool"]["poetry"]["group"]["typing"]["dependencies"]["mypy"] = (
                "^1.10"
            )
            pyproject["tool"]["poetry"]["group"]["lint"]["dependencies"]["ruff"] = (
                "^0.5"
            )
        except KeyError:
            continue
        with open(path, "w") as f:
            toml.dump(pyproject, f)
        cwd = "/".join(path.split("/")[:-1])
        completed = subprocess.run(
            "poetry lock --no-update; poetry install --with typing; poetry run mypy . --no-color",
            cwd=cwd,
            shell=True,
            capture_output=True,
            text=True,
        )
        logs = completed.stdout.split("\n")

        to_ignore = {}
        for l in logs:
            if re.match("^(.*)\:(\d+)\: error:.*\[(.*)\]", l):
                path, line_no, error_type = re.match(
                    "^(.*)\:(\d+)\: error:.*\[(.*)\]", l
                ).groups()
                if (path, line_no) in to_ignore:
                    to_ignore[(path, line_no)].append(error_type)
                else:
                    to_ignore[(path, line_no)] = [error_type]
        print(len(to_ignore))
        for (error_path, line_no), error_types in to_ignore.items():
            all_errors = ", ".join(error_types)
            full_path = f"{cwd}/{error_path}"
            try:
                with open(full_path, "r") as f:
                    file_lines = f.readlines()
            except FileNotFoundError:
                continue
            file_lines[int(line_no) - 1] = (
                file_lines[int(line_no) - 1][:-1] + f"  # type: ignore[{all_errors}]\n"
            )
            with open(full_path, "w") as f:
                f.write("".join(file_lines))

        subprocess.run(
            "poetry run ruff format .; poetry run ruff --select I --fix .",
            cwd=cwd,
            shell=True,
            capture_output=True,
            text=True,
        )


if __name__ == "__main__":
    main()

```
2024-07-03 10:33:27 -07:00

99 lines
3.4 KiB
Python

"""Test the telegram chat loader."""
import pathlib
import tempfile
import zipfile
from typing import Sequence
import pytest
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_community.chat_loaders import telegram, utils
def _assert_messages_are_equal(
actual_messages: Sequence[BaseMessage],
expected_messages: Sequence[BaseMessage],
) -> None:
assert len(actual_messages) == len(expected_messages)
for actual, expected in zip(actual_messages, expected_messages):
assert actual.content == expected.content
assert (
actual.additional_kwargs["sender"] == expected.additional_kwargs["sender"]
)
def _check_telegram_chat_loader(path: str) -> None:
_data_dir = pathlib.Path(__file__).parent / "data"
source_path = _data_dir / path
# Create a zip file from the directory in a temp directory
with tempfile.TemporaryDirectory() as temp_dir_:
temp_dir = pathlib.Path(temp_dir_)
if path.endswith(".zip"):
# Make a new zip file
zip_path = temp_dir / "telegram_chat.zip"
with zipfile.ZipFile(zip_path, "w") as zip_file:
original_path = _data_dir / path.replace(".zip", "")
for file_path in original_path.iterdir():
zip_file.write(file_path, arcname=file_path.name)
source_path = zip_path
loader = telegram.TelegramChatLoader(str(source_path))
chat_sessions_ = loader.lazy_load()
chat_sessions_ = utils.merge_chat_runs(chat_sessions_)
chat_sessions = list(
utils.map_ai_messages(chat_sessions_, sender="Batman & Robin")
)
assert len(chat_sessions) == 1
session = chat_sessions[0]
assert len(session["messages"]) > 0
assert session["messages"][0].content == "i refuse to converse with you"
expected_content = [
HumanMessage(
content="i refuse to converse with you",
additional_kwargs={
"sender": "Jimmeny Marvelton",
"events": [{"message_time": "23.08.2023 13:11:23 UTC-08:00"}],
},
),
AIMessage(
content="Hi nemesis",
additional_kwargs={
"sender": "Batman & Robin",
"events": [{"message_time": "23.08.2023 13:13:20 UTC-08:00"}],
},
),
HumanMessage(
content="we meet again\n\nyou will not trick me this time",
additional_kwargs={
"sender": "Jimmeny Marvelton",
"events": [{"message_time": "23.08.2023 13:15:35 UTC-08:00"}],
},
),
]
_assert_messages_are_equal(session["messages"], expected_content)
@pytest.mark.parametrize(
"path",
[
"telegram_chat_json",
"telegram_chat_json.zip",
"telegram_chat_json/result.json",
],
)
def test_telegram_chat_loader(path: str) -> None:
_check_telegram_chat_loader(path)
@pytest.mark.skip(reason="requires bs4 but marking it as such doesn't seem to work")
@pytest.mark.parametrize(
"path",
[
"telegram_chat_json",
"telegram_chat_json.zip",
"telegram_chat_json/result.json",
],
)
def test_telegram_chat_loader_html(path: str) -> None:
_check_telegram_chat_loader(path)