test(mistralai): stabilize integration tests with rate limiting and retries (#37588)

Mistral integration tests have been flaky against the live API. This
adds a shared, xdist-aware rate limiter and a global retry policy so
transient 429s no longer fail the suite.
This commit is contained in:
Mason Daugherty
2026-05-20 19:49:25 -05:00
committed by GitHub
parent 5197dd5985
commit 4d2efcd756
6 changed files with 51 additions and 7 deletions

View File

@@ -0,0 +1,18 @@
"""Shared rate limiter for Mistral integration tests.
Scaled by ``PYTEST_XDIST_WORKER_COUNT`` so aggregate QPS across all xdist
workers stays bounded near the target rate.
"""
from __future__ import annotations
import os
from langchain_core.rate_limiters import InMemoryRateLimiter
_TARGET_REQUESTS_PER_SECOND = 0.5
_WORKER_COUNT = max(1, int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", "1")))
rate_limiter = InMemoryRateLimiter(
requests_per_second=_TARGET_REQUESTS_PER_SECOND / _WORKER_COUNT,
)

View File

@@ -13,11 +13,12 @@ from pydantic import BaseModel
from typing_extensions import TypedDict
from langchain_mistralai.chat_models import ChatMistralAI
from tests.integration_tests._rate_limiter import rate_limiter
async def test_astream() -> None:
"""Test streaming tokens from ChatMistralAI."""
llm = ChatMistralAI()
llm = ChatMistralAI(rate_limiter=rate_limiter)
full: BaseMessageChunk | None = None
chunks_with_token_counts = 0
@@ -70,7 +71,7 @@ def _check_parsed_result(result: Any, schema: Any) -> None:
@pytest.mark.parametrize("schema", [Book, BookDict, Book.model_json_schema()])
def test_structured_output_json_schema(schema: Any) -> None:
llm = ChatMistralAI(model="ministral-8b-latest") # type: ignore[call-arg]
llm = ChatMistralAI(model="ministral-8b-latest", rate_limiter=rate_limiter) # type: ignore[call-arg]
structured_llm = llm.with_structured_output(schema, method="json_schema")
messages = [
@@ -91,7 +92,7 @@ def test_structured_output_json_schema(schema: Any) -> None:
@pytest.mark.parametrize("schema", [Book, BookDict, Book.model_json_schema()])
async def test_structured_output_json_schema_async(schema: Any) -> None:
llm = ChatMistralAI(model="ministral-8b-latest") # type: ignore[call-arg]
llm = ChatMistralAI(model="ministral-8b-latest", rate_limiter=rate_limiter) # type: ignore[call-arg]
structured_llm = llm.with_structured_output(schema, method="json_schema")
messages = [
@@ -116,6 +117,7 @@ def test_retry_parameters(caplog: pytest.LogCaptureFixture) -> None:
mistral = ChatMistralAI(
timeout=1, # Very short timeout to trigger timeouts
max_retries=3, # Should retry 3 times
rate_limiter=rate_limiter,
)
# Simple test input that should take longer than 1 second to process
@@ -148,7 +150,7 @@ def test_retry_parameters(caplog: pytest.LogCaptureFixture) -> None:
def test_reasoning() -> None:
model = ChatMistralAI(model="magistral-medium-latest") # type: ignore[call-arg]
model = ChatMistralAI(model="magistral-medium-latest", rate_limiter=rate_limiter) # type: ignore[call-arg]
input_message = {
"role": "user",
"content": "Hello, my name is Bob.",
@@ -172,7 +174,11 @@ def test_reasoning() -> None:
def test_reasoning_v1() -> None:
model = ChatMistralAI(model="magistral-medium-latest", output_version="v1") # type: ignore[call-arg]
model = ChatMistralAI( # type: ignore[call-arg]
model="magistral-medium-latest",
output_version="v1",
rate_limiter=rate_limiter,
)
input_message = {
"role": "user",
"content": "Hello, my name is Bob.",

View File

@@ -7,6 +7,7 @@ from langchain_tests.integration_tests import ( # type: ignore[import-not-found
)
from langchain_mistralai import ChatMistralAI
from tests.integration_tests._rate_limiter import rate_limiter
class TestMistralStandard(ChatModelIntegrationTests):
@@ -16,7 +17,11 @@ class TestMistralStandard(ChatModelIntegrationTests):
@property
def chat_model_params(self) -> dict:
return {"model": "mistral-large-latest", "temperature": 0}
return {
"model": "mistral-large-latest",
"temperature": 0,
"rate_limiter": rate_limiter,
}
@property
def supports_json_mode(self) -> bool: