Files
langchain/libs/partners/mistralai/tests/integration_tests/test_embeddings.py
noeliecherrier 08bb74f148 fix(mistralai): handle HTTP errors in async embed documents (#33187)
The async embed function does not properly handle HTTP errors.

For instance with large batches, Mistral AI returns `Too many inputs in
request, split into more batches.` in a 400 error.

This leads to a KeyError in `response.json()["data"]` l.288

This PR fixes the issue by:
- calling `response.raise_for_status()` before returning
- adding a retry similarly to what is done in the synchronous
counterpart `embed_documents`

I also added an integration test, but willing to move it to unit tests
if more relevant.
2025-10-01 10:57:47 -04:00

76 lines
2.3 KiB
Python

"""Test MistralAI Embedding."""
from unittest.mock import patch
import httpx
import pytest
import tenacity
from langchain_mistralai import MistralAIEmbeddings
def test_mistralai_embedding_documents() -> None:
"""Test MistralAI embeddings for documents."""
documents = ["foo bar", "test document"]
embedding = MistralAIEmbeddings()
output = embedding.embed_documents(documents)
assert len(output) == 2
assert len(output[0]) == 1024
def test_mistralai_embedding_query() -> None:
"""Test MistralAI embeddings for query."""
document = "foo bar"
embedding = MistralAIEmbeddings()
output = embedding.embed_query(document)
assert len(output) == 1024
async def test_mistralai_embedding_documents_async() -> None:
"""Test MistralAI embeddings for documents."""
documents = ["foo bar", "test document"]
embedding = MistralAIEmbeddings()
output = await embedding.aembed_documents(documents)
assert len(output) == 2
assert len(output[0]) == 1024
async def test_mistralai_embedding_documents_http_error_async() -> None:
"""Test MistralAI embeddings for documents."""
documents = ["foo bar", "test document"]
embedding = MistralAIEmbeddings(max_retries=0)
mock_response = httpx.Response(
status_code=400,
request=httpx.Request("POST", url=embedding.async_client.base_url),
)
with (
patch.object(embedding.async_client, "post", return_value=mock_response),
pytest.raises(tenacity.RetryError),
):
await embedding.aembed_documents(documents)
async def test_mistralai_embedding_query_async() -> None:
"""Test MistralAI embeddings for query."""
document = "foo bar"
embedding = MistralAIEmbeddings()
output = await embedding.aembed_query(document)
assert len(output) == 1024
def test_mistralai_embedding_documents_long() -> None:
"""Test MistralAI embeddings for documents."""
documents = ["foo bar " * 1000, "test document " * 1000] * 5
embedding = MistralAIEmbeddings()
output = embedding.embed_documents(documents)
assert len(output) == 10
assert len(output[0]) == 1024
def test_mistralai_embed_query_character() -> None:
"""Test MistralAI embeddings for query."""
document = "😳"
embedding = MistralAIEmbeddings()
output = embedding.embed_query(document)
assert len(output) == 1024