chore(standard-tests): select ALL rules with exclusions (#31937)

Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-09-11 16:01:33 +00:00 · 2025-09-08 16:57:47 +02:00
parent 323729915a
commit c672590f42
12 changed files with 83 additions and 42 deletions
--- a/libs/standard-tests/langchain_tests/base.py
+++ b/libs/standard-tests/langchain_tests/base.py
@@ -1,15 +1,13 @@
 """Standard tests."""

-from abc import ABC

-
-class BaseStandardTests(ABC):
+class BaseStandardTests:
    """Base class for standard tests.

    :private:
    """

-    def test_no_overrides_DO_NOT_OVERRIDE(self) -> None:
+    def test_no_overrides_DO_NOT_OVERRIDE(self) -> None:  # noqa: N802
        """Test that no standard tests are overridden.

        :private:
--- a/libs/standard-tests/langchain_tests/conftest.py
+++ b/libs/standard-tests/langchain_tests/conftest.py
@@ -3,7 +3,7 @@
 import gzip
 from os import PathLike
 from pathlib import Path
-from typing import Any, Union
+from typing import Any, Union, cast

 import pytest
 import yaml
@@ -27,7 +27,13 @@ class CustomSerializer:
    def serialize(cassette_dict: dict) -> bytes:
        """Convert cassette to YAML and compress it."""
        cassette_dict["requests"] = [
-            request._to_dict() for request in cassette_dict["requests"]
+            {
+                "method": request.method,
+                "uri": request.uri,
+                "body": request.body,
+                "headers": {k: [v] for k, v in request.headers.items()},
+            }
+            for request in cassette_dict["requests"]
        ]
        yml = yaml.safe_dump(cassette_dict)
        return gzip.compress(yml.encode("utf-8"))
@@ -35,11 +41,9 @@ class CustomSerializer:
    @staticmethod
    def deserialize(data: bytes) -> dict:
        """Decompress data and convert it from YAML."""
-        text = gzip.decompress(data).decode("utf-8")
-        cassette: dict[str, Any] = yaml.safe_load(text)
-        cassette["requests"] = [
-            Request._from_dict(request) for request in cassette["requests"]
-        ]
+        decoded_yaml = gzip.decompress(data).decode("utf-8")
+        cassette = cast("dict[str, Any]", yaml.safe_load(decoded_yaml))
+        cassette["requests"] = [Request(**request) for request in cassette["requests"]]
        return cassette


--- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
+++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
@@ -30,14 +30,14 @@ from pydantic import BaseModel, Field
 from pydantic.v1 import BaseModel as BaseModelV1
 from pydantic.v1 import Field as FieldV1
 from pytest_benchmark.fixture import BenchmarkFixture  # type: ignore[import-untyped]
-from typing_extensions import TypedDict
+from typing_extensions import TypedDict, override
 from vcr.cassette import Cassette

 from langchain_tests.unit_tests.chat_models import ChatModelTests
 from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION


-def _get_joke_class(
+def _get_joke_class(  # noqa: RET503
    schema_type: Literal["pydantic", "typeddict", "json_schema"],
 ) -> Any:
    class Joke(BaseModel):
@@ -56,7 +56,7 @@ def _get_joke_class(
        punchline: Annotated[str, ..., "answer to resolve the joke"]

    def validate_joke_dict(result: Any) -> bool:
-        return all(key in ["setup", "punchline"] for key in result)
+        return all(key in {"setup", "punchline"} for key in result)

    if schema_type == "pydantic":
        return Joke, validate_joke
@@ -75,6 +75,7 @@ class _TestCallbackHandler(BaseCallbackHandler):
        super().__init__()
        self.options = []

+    @override
    def on_chat_model_start(
        self,
        serialized: Any,
@@ -1042,7 +1043,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        # Needed for langchain_core.callbacks.usage
        model_name = result.response_metadata.get("model_name")
        assert isinstance(model_name, str)
-        assert model_name != "", "model_name is empty"
+        assert model_name, "model_name is empty"

        # `input_tokens` is the total, possibly including other unclassified or
        # system-level tokens.
@@ -1056,10 +1057,9 @@ class ChatModelIntegrationTests(ChatModelTests):
            ) is not None
            assert isinstance(input_token_details.get("audio"), int)
            # Asserts that total input tokens are at least the sum of the token counts
-            total_detailed_tokens = sum(
+            assert usage_metadata.get("input_tokens", 0) >= sum(
                v for v in input_token_details.values() if isinstance(v, int)
            )
-            assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens
        if "audio_output" in self.supported_usage_metadata_details["invoke"]:
            msg = self.invoke_with_audio_output()
            assert (usage_metadata := msg.usage_metadata) is not None
@@ -1068,10 +1068,9 @@ class ChatModelIntegrationTests(ChatModelTests):
            ) is not None
            assert isinstance(output_token_details.get("audio"), int)
            # Asserts that total output tokens are at least the sum of the token counts
-            total_detailed_tokens = sum(
+            assert usage_metadata.get("output_tokens", 0) >= sum(
                v for v in output_token_details.values() if isinstance(v, int)
            )
-            assert usage_metadata.get("output_tokens", 0) >= total_detailed_tokens
        if "reasoning_output" in self.supported_usage_metadata_details["invoke"]:
            msg = self.invoke_with_reasoning_output()
            assert (usage_metadata := msg.usage_metadata) is not None
@@ -1080,10 +1079,9 @@ class ChatModelIntegrationTests(ChatModelTests):
            ) is not None
            assert isinstance(output_token_details.get("reasoning"), int)
            # Asserts that total output tokens are at least the sum of the token counts
-            total_detailed_tokens = sum(
+            assert usage_metadata.get("output_tokens", 0) >= sum(
                v for v in output_token_details.values() if isinstance(v, int)
            )
-            assert usage_metadata.get("output_tokens", 0) >= total_detailed_tokens
        if "cache_read_input" in self.supported_usage_metadata_details["invoke"]:
            msg = self.invoke_with_cache_read_input()
            assert (usage_metadata := msg.usage_metadata) is not None
@@ -1092,10 +1090,9 @@ class ChatModelIntegrationTests(ChatModelTests):
            ) is not None
            assert isinstance(input_token_details.get("cache_read"), int)
            # Asserts that total input tokens are at least the sum of the token counts
-            total_detailed_tokens = sum(
+            assert usage_metadata.get("input_tokens", 0) >= sum(
                v for v in input_token_details.values() if isinstance(v, int)
            )
-            assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens
        if "cache_creation_input" in self.supported_usage_metadata_details["invoke"]:
            msg = self.invoke_with_cache_creation_input()
            assert (usage_metadata := msg.usage_metadata) is not None
@@ -1104,10 +1101,9 @@ class ChatModelIntegrationTests(ChatModelTests):
            ) is not None
            assert isinstance(input_token_details.get("cache_creation"), int)
            # Asserts that total input tokens are at least the sum of the token counts
-            total_detailed_tokens = sum(
+            assert usage_metadata.get("input_tokens", 0) >= sum(
                v for v in input_token_details.values() if isinstance(v, int)
            )
-            assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens

    def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
        """Test usage metadata in streaming mode.
@@ -1235,7 +1231,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        # Needed for langchain_core.callbacks.usage
        model_name = full.response_metadata.get("model_name")
        assert isinstance(model_name, str)
-        assert model_name != "", "model_name is empty"
+        assert model_name, "model_name is empty"

        if "audio_input" in self.supported_usage_metadata_details["stream"]:
            msg = self.invoke_with_audio_input(stream=True)
@@ -1720,7 +1716,7 @@ class ChatModelIntegrationTests(ChatModelTests):
            pytest.skip("Test requires tool choice.")

        @tool
-        def get_weather(location: str) -> str:
+        def get_weather(location: str) -> str:  # noqa: ARG001
            """Get weather at a location."""
            return "It's sunny."

@@ -2130,7 +2126,7 @@ class ChatModelIntegrationTests(ChatModelTests):
            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
            of ``with_structured_output``.

-        """  # noqa: E501
+        """
        if not self.has_structured_output:
            pytest.skip("Test requires structured output.")

@@ -2262,8 +2258,8 @@ class ChatModelIntegrationTests(ChatModelTests):
        if not self.supports_json_mode:
            pytest.skip("Test requires json mode support.")

-        from pydantic import BaseModel as BaseModelProper
-        from pydantic import Field as FieldProper
+        from pydantic import BaseModel as BaseModelProper  # noqa: PLC0415
+        from pydantic import Field as FieldProper  # noqa: PLC0415

        class Joke(BaseModelProper):
            """Joke to tell user."""
@@ -2912,7 +2908,7 @@ class ChatModelIntegrationTests(ChatModelTests):
            pytest.skip("Test requires tool calling.")

        @tool
-        def get_weather(location: str) -> str:
+        def get_weather(location: str) -> str:  # noqa: ARG001
            """Call to surf the web."""
            return "It's sunny."

--- a/libs/standard-tests/langchain_tests/integration_tests/indexer.py
+++ b/libs/standard-tests/langchain_tests/integration_tests/indexer.py
@@ -175,7 +175,7 @@ class DocumentIndexerTestSuite(ABC):

    def test_delete_no_args(self, index: DocumentIndex) -> None:
        """Test delete with no args raises ValueError."""
-        with pytest.raises(ValueError):
+        with pytest.raises(ValueError):  # noqa: PT011
            index.delete()

    def test_delete_missing_content(self, index: DocumentIndex) -> None:
@@ -367,7 +367,7 @@ class AsyncDocumentIndexTestSuite(ABC):

    async def test_delete_no_args(self, index: DocumentIndex) -> None:
        """Test delete with no args raises ValueError."""
-        with pytest.raises(ValueError):
+        with pytest.raises(ValueError):  # noqa: PT011
            await index.adelete()

    async def test_delete_missing_content(self, index: DocumentIndex) -> None:
--- a/libs/standard-tests/langchain_tests/utils/pydantic.py
+++ b/libs/standard-tests/langchain_tests/utils/pydantic.py
@@ -7,7 +7,7 @@
 def get_pydantic_major_version() -> int:
    """Get the major version of Pydantic."""
    try:
-        import pydantic
+        import pydantic  # noqa: PLC0415

        return int(pydantic.__version__.split(".")[0])
    except ImportError:
--- a/libs/standard-tests/pyproject.toml
+++ b/libs/standard-tests/pyproject.toml
@@ -59,14 +59,45 @@ ignore_missing_imports = true
 target-version = "py39"

 [tool.ruff.lint]
-select = ["D", "E", "F", "I", "PGH", "T201", "UP",]
-pyupgrade.keep-runtime-typing = true
+select = [ "ALL",]
+ignore = [
+    "C90",     # McCabe complexity
+    "COM812",  # Messes with the formatter
+    "FA100",   # Can't activate since we exclude UP007 for now
+    "FIX002",  # Line contains TODO
+    "ISC001",  # Messes with the formatter
+    "PERF203", # Rarely useful
+    "PLR2004", # Magic numbers
+    "PLR09",   # Too many something (arg, statements, etc)
+    "RUF012",  # Doesn't play well with Pydantic
+    "S101",    # Asserts allowed in tests
+    "S311",    # No need for strong crypto in tests
+    "SLF001",  # Tests may call private methods
+    "TC001",   # Doesn't play well with Pydantic
+    "TC002",   # Doesn't play well with Pydantic
+    "TC003",   # Doesn't play well with Pydantic
+    "TD002",   # Missing author in TODO
+    "TD003",   # Missing issue link in TODO

-[tool.ruff.lint.pydocstyle]
-convention = "google"
+    # TODO rules
+    "ANN401",
+    "BLE",
+]
+unfixable = [
+    "B028",    # People should intentionally tune the stacklevel
+    "PLW1510", # People should intentionally set the check argument
+]
+
+flake8-annotations.allow-star-arg-any = true
+flake8-annotations.mypy-init-return = true
+flake8-type-checking.runtime-evaluated-base-classes = ["pydantic.BaseModel","langchain_core.load.serializable.Serializable","langchain_core.runnables.base.RunnableSerializable"]
+pep8-naming.classmethod-decorators = [ "classmethod", "langchain_core.utils.pydantic.pre_init", "pydantic.field_validator", "pydantic.v1.root_validator",]
+pydocstyle.convention = "google"
+pyupgrade.keep-runtime-typing = true

 [tool.ruff.lint.per-file-ignores]
 "tests/**" = [ "D1",]
+"scripts/**" = [ "INP",]

 [tool.coverage.run]
 omit = ["tests/*"]
--- a/libs/standard-tests/tests/integration_tests/init.py
+++ b/libs/standard-tests/tests/integration_tests/init.py
--- a/libs/standard-tests/tests/unit_tests/custom_chat_model.py
+++ b/libs/standard-tests/tests/unit_tests/custom_chat_model.py
@@ -7,6 +7,7 @@ from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
 from langchain_core.messages.ai import UsageMetadata
 from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
 from pydantic import Field
+from typing_extensions import override


 class ChatParrotLink(BaseChatModel):
@@ -41,6 +42,7 @@ class ChatParrotLink(BaseChatModel):
    stop: Optional[list[str]] = None
    max_retries: int = 2

+    @override
    def _generate(
        self,
        messages: list[BaseMessage],
@@ -92,6 +94,7 @@ class ChatParrotLink(BaseChatModel):
        generation = ChatGeneration(message=message)
        return ChatResult(generations=[generation])

+    @override
    def _stream(
        self,
        messages: list[BaseMessage],
--- a/libs/standard-tests/tests/unit_tests/test_basic_tool.py
+++ b/libs/standard-tests/tests/unit_tests/test_basic_tool.py
@@ -1,6 +1,7 @@
 from typing import Literal

 from langchain_core.tools import BaseTool
+from typing_extensions import override

 from langchain_tests.integration_tests import ToolsIntegrationTests
 from langchain_tests.unit_tests import ToolsUnitTests
@@ -12,6 +13,7 @@ class ParrotMultiplyTool(BaseTool):
        "Multiply two numbers like a parrot. Parrots always add eighty for their matey."
    )

+    @override
    def _run(self, a: int, b: int) -> int:
        return a * b + 80

@@ -23,6 +25,7 @@ class ParrotMultiplyArtifactTool(BaseTool):
    )
    response_format: Literal["content_and_artifact"] = "content_and_artifact"

+    @override
    def _run(self, a: int, b: int) -> tuple[int, str]:
        return a * b + 80, "parrot artifact"

--- a/libs/standard-tests/tests/unit_tests/test_custom_chat_model.py
+++ b/libs/standard-tests/tests/unit_tests/test_custom_chat_model.py
@@ -1,9 +1,8 @@
 """Test the standard tests on the custom chat model in the docs."""

-from typing import Optional
-
 import pytest
 from langchain_core.language_models.chat_models import BaseChatModel
+from typing_extensions import Any

 from langchain_tests.integration_tests import ChatModelIntegrationTests
 from langchain_tests.unit_tests import ChatModelUnitTests
@@ -34,7 +33,6 @@ class TestChatParrotLinkIntegration(ChatModelIntegrationTests):
    def test_unicode_tool_call_integration(
        self,
        model: BaseChatModel,
-        tool_choice: Optional[str] = None,
-        force_tool_call: bool = True,
+        **_: Any,
    ) -> None:
        """Expected failure as ChatParrotLink doesn't support tool calling yet."""
--- a/libs/standard-tests/tests/unit_tests/test_in_memory_base_store.py
+++ b/libs/standard-tests/tests/unit_tests/test_in_memory_base_store.py
@@ -2,6 +2,7 @@

 import pytest
 from langchain_core.stores import InMemoryStore
+from typing_extensions import override

 from langchain_tests.integration_tests.base_store import (
    BaseStoreAsyncTests,
@@ -11,19 +12,23 @@ from langchain_tests.integration_tests.base_store import (

 class TestInMemoryStore(BaseStoreSyncTests[str]):
    @pytest.fixture
+    @override
    def three_values(self) -> tuple[str, str, str]:
        return "foo", "bar", "buzz"

    @pytest.fixture
+    @override
    def kv_store(self) -> InMemoryStore:
        return InMemoryStore()


 class TestInMemoryStoreAsync(BaseStoreAsyncTests[str]):
    @pytest.fixture
+    @override
    def three_values(self) -> tuple[str, str, str]:
        return "foo", "bar", "buzz"

    @pytest.fixture
+    @override
    async def kv_store(self) -> InMemoryStore:
        return InMemoryStore()
--- a/libs/standard-tests/tests/unit_tests/test_in_memory_cache.py
+++ b/libs/standard-tests/tests/unit_tests/test_in_memory_cache.py
@@ -1,5 +1,6 @@
 import pytest
 from langchain_core.caches import InMemoryCache
+from typing_extensions import override

 from langchain_tests.integration_tests.cache import (
    AsyncCacheTestSuite,
@@ -9,11 +10,13 @@ from langchain_tests.integration_tests.cache import (

 class TestInMemoryCache(SyncCacheTestSuite):
    @pytest.fixture
+    @override
    def cache(self) -> InMemoryCache:
        return InMemoryCache()


 class TestInMemoryCacheAsync(AsyncCacheTestSuite):
    @pytest.fixture
+    @override
    async def cache(self) -> InMemoryCache:
        return InMemoryCache()