langchain/libs/community/tests/integration_tests/embeddings/test_titan_takeoff.py
Bagatur a0c2281540
infra: update mypy 1.10, ruff 0.5 (#23721)
```python
"""python scripts/update_mypy_ruff.py"""
import glob
import tomllib
from pathlib import Path

import toml
import subprocess
import re

ROOT_DIR = Path(__file__).parents[1]


def main():
    for path in glob.glob(str(ROOT_DIR / "libs/**/pyproject.toml"), recursive=True):
        print(path)
        with open(path, "rb") as f:
            pyproject = tomllib.load(f)
        try:
            pyproject["tool"]["poetry"]["group"]["typing"]["dependencies"]["mypy"] = (
                "^1.10"
            )
            pyproject["tool"]["poetry"]["group"]["lint"]["dependencies"]["ruff"] = (
                "^0.5"
            )
        except KeyError:
            continue
        with open(path, "w") as f:
            toml.dump(pyproject, f)
        cwd = "/".join(path.split("/")[:-1])
        completed = subprocess.run(
            "poetry lock --no-update; poetry install --with typing; poetry run mypy . --no-color",
            cwd=cwd,
            shell=True,
            capture_output=True,
            text=True,
        )
        logs = completed.stdout.split("\n")

        to_ignore = {}
        for l in logs:
            if re.match("^(.*)\:(\d+)\: error:.*\[(.*)\]", l):
                path, line_no, error_type = re.match(
                    "^(.*)\:(\d+)\: error:.*\[(.*)\]", l
                ).groups()
                if (path, line_no) in to_ignore:
                    to_ignore[(path, line_no)].append(error_type)
                else:
                    to_ignore[(path, line_no)] = [error_type]
        print(len(to_ignore))
        for (error_path, line_no), error_types in to_ignore.items():
            all_errors = ", ".join(error_types)
            full_path = f"{cwd}/{error_path}"
            try:
                with open(full_path, "r") as f:
                    file_lines = f.readlines()
            except FileNotFoundError:
                continue
            file_lines[int(line_no) - 1] = (
                file_lines[int(line_no) - 1][:-1] + f"  # type: ignore[{all_errors}]\n"
            )
            with open(full_path, "w") as f:
                f.write("".join(file_lines))

        subprocess.run(
            "poetry run ruff format .; poetry run ruff --select I --fix .",
            cwd=cwd,
            shell=True,
            capture_output=True,
            text=True,
        )


if __name__ == "__main__":
    main()

```
2024-07-03 10:33:27 -07:00

185 lines
6.1 KiB
Python

"""Test Titan Takeoff Embedding wrapper."""
import json
from typing import Any
import pytest
from langchain_community.embeddings import TitanTakeoffEmbed
from langchain_community.embeddings.titan_takeoff import (
Device,
MissingConsumerGroup,
ReaderConfig,
)
@pytest.mark.requires("pytest_httpx")
@pytest.mark.requires("takeoff_client")
def test_titan_takeoff_call(httpx_mock: Any) -> None:
"""Test valid call to Titan Takeoff."""
port = 2345
httpx_mock.add_response(
method="POST",
url=f"http://localhost:{port}/embed",
json={"result": [0.46635, 0.234, -0.8521]},
)
embedding = TitanTakeoffEmbed(port=port)
output_1 = embedding.embed_documents(["What is 2 + 2?"], "primary")
output_2 = embedding.embed_query("What is 2 + 2?", "primary")
assert isinstance(output_1, list)
assert isinstance(output_2, list)
assert len(httpx_mock.get_requests()) == 2
for n in range(2):
assert httpx_mock.get_requests()[n].url == f"http://localhost:{port}/embed"
assert (
json.loads(httpx_mock.get_requests()[n].content)["text"] == "What is 2 + 2?"
)
@pytest.mark.requires("pytest_httpx")
@pytest.mark.requires("takeoff_client")
def test_no_consumer_group_fails(httpx_mock: Any) -> None:
"""Test that not specifying a consumer group fails."""
port = 2345
httpx_mock.add_response(
method="POST",
url=f"http://localhost:{port}/embed",
json={"result": [0.46635, 0.234, -0.8521]},
)
embedding = TitanTakeoffEmbed(port=port)
with pytest.raises(MissingConsumerGroup):
embedding.embed_documents(["What is 2 + 2?"])
with pytest.raises(MissingConsumerGroup):
embedding.embed_query("What is 2 + 2?")
# Check specifying a consumer group works
embedding.embed_documents(["What is 2 + 2?"], "primary")
embedding.embed_query("What is 2 + 2?", "primary")
@pytest.mark.requires("pytest_httpx")
@pytest.mark.requires("takeoff_client")
def test_takeoff_initialization(httpx_mock: Any) -> None:
"""Test valid call to Titan Takeoff."""
mgnt_port = 36452
inf_port = 46253
mgnt_url = f"http://localhost:{mgnt_port}/reader"
embed_url = f"http://localhost:{inf_port}/embed"
reader_1 = ReaderConfig(
model_name="test",
device=Device.cpu,
consumer_group="embed",
)
reader_2 = ReaderConfig(
model_name="test2",
device=Device.cuda,
consumer_group="embed",
)
httpx_mock.add_response(
method="POST", url=mgnt_url, json={"key": "value"}, status_code=201
)
httpx_mock.add_response(
method="POST",
url=embed_url,
json={"result": [0.34, 0.43, -0.934532]},
status_code=200,
)
llm = TitanTakeoffEmbed(
port=inf_port, mgmt_port=mgnt_port, models=[reader_1, reader_2]
)
# Shouldn't need to specify consumer group as there is only one specified during
# initialization
output_1 = llm.embed_documents(["What is 2 + 2?"])
output_2 = llm.embed_query("What is 2 + 2?")
assert isinstance(output_1, list)
assert isinstance(output_2, list)
# Ensure the management api was called to create the reader
assert len(httpx_mock.get_requests()) == 4
for key, value in reader_1.dict().items():
assert json.loads(httpx_mock.get_requests()[0].content)[key] == value
assert httpx_mock.get_requests()[0].url == mgnt_url
# Also second call should be made to spin uo reader 2
for key, value in reader_2.dict().items():
assert json.loads(httpx_mock.get_requests()[1].content)[key] == value
assert httpx_mock.get_requests()[1].url == mgnt_url
# Ensure the third call is to generate endpoint to inference
for n in range(2, 4):
assert httpx_mock.get_requests()[n].url == embed_url
assert (
json.loads(httpx_mock.get_requests()[n].content)["text"] == "What is 2 + 2?"
)
@pytest.mark.requires("pytest_httpx")
@pytest.mark.requires("takeoff_client")
def test_takeoff_initialization_with_more_than_one_consumer_group(
httpx_mock: Any,
) -> None:
"""Test valid call to Titan Takeoff."""
mgnt_port = 36452
inf_port = 46253
mgnt_url = f"http://localhost:{mgnt_port}/reader"
embed_url = f"http://localhost:{inf_port}/embed"
reader_1 = ReaderConfig(
model_name="test",
device=Device.cpu,
consumer_group="embed",
)
reader_2 = ReaderConfig(
model_name="test2",
device=Device.cuda,
consumer_group="embed2",
)
httpx_mock.add_response(
method="POST", url=mgnt_url, json={"key": "value"}, status_code=201
)
httpx_mock.add_response(
method="POST",
url=embed_url,
json={"result": [0.34, 0.43, -0.934532]},
status_code=200,
)
llm = TitanTakeoffEmbed(
port=inf_port, mgmt_port=mgnt_port, models=[reader_1, reader_2]
)
# There was more than one consumer group specified during initialization so we
# need to specify which one to use
with pytest.raises(MissingConsumerGroup):
llm.embed_documents(["What is 2 + 2?"])
with pytest.raises(MissingConsumerGroup):
llm.embed_query("What is 2 + 2?")
output_1 = llm.embed_documents(["What is 2 + 2?"], "embed")
output_2 = llm.embed_query("What is 2 + 2?", "embed2")
assert isinstance(output_1, list)
assert isinstance(output_2, list)
# Ensure the management api was called to create the reader
assert len(httpx_mock.get_requests()) == 4
for key, value in reader_1.dict().items():
assert json.loads(httpx_mock.get_requests()[0].content)[key] == value
assert httpx_mock.get_requests()[0].url == mgnt_url
# Also second call should be made to spin uo reader 2
for key, value in reader_2.dict().items():
assert json.loads(httpx_mock.get_requests()[1].content)[key] == value
assert httpx_mock.get_requests()[1].url == mgnt_url
# Ensure the third call is to generate endpoint to inference
for n in range(2, 4):
assert httpx_mock.get_requests()[n].url == embed_url
assert (
json.loads(httpx_mock.get_requests()[n].content)["text"] == "What is 2 + 2?"
)