mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-26 16:43:35 +00:00
mistral[minor]: Added Retrying Mechanism in case of Request Rate Limit Error for MistralAIEmbeddings
(#27818)
- **Description:**: In the event of a Rate Limit Error from the MistralAI server, the response JSON raises a KeyError. To address this, a simple retry mechanism has been implemented to handle cases where the request limit is exceeded. - **Issue:** #27790 --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
parent
df5008fe55
commit
a37afbe353
@ -4,6 +4,7 @@ import warnings
|
|||||||
from typing import Iterable, List
|
from typing import Iterable, List
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
from httpx import Response
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
from langchain_core.utils import (
|
from langchain_core.utils import (
|
||||||
secret_from_env,
|
secret_from_env,
|
||||||
@ -15,6 +16,7 @@ from pydantic import (
|
|||||||
SecretStr,
|
SecretStr,
|
||||||
model_validator,
|
model_validator,
|
||||||
)
|
)
|
||||||
|
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
|
||||||
from tokenizers import Tokenizer # type: ignore
|
from tokenizers import Tokenizer # type: ignore
|
||||||
from typing_extensions import Self
|
from typing_extensions import Self
|
||||||
|
|
||||||
@ -58,6 +60,8 @@ class MistralAIEmbeddings(BaseModel, Embeddings):
|
|||||||
The number of times to retry a request if it fails.
|
The number of times to retry a request if it fails.
|
||||||
timeout: int
|
timeout: int
|
||||||
The number of seconds to wait for a response before timing out.
|
The number of seconds to wait for a response before timing out.
|
||||||
|
wait_time: int
|
||||||
|
The number of seconds to wait before retrying a request in case of 429 error.
|
||||||
max_concurrent_requests: int
|
max_concurrent_requests: int
|
||||||
The maximum number of concurrent requests to make to the Mistral API.
|
The maximum number of concurrent requests to make to the Mistral API.
|
||||||
|
|
||||||
@ -128,6 +132,7 @@ class MistralAIEmbeddings(BaseModel, Embeddings):
|
|||||||
endpoint: str = "https://api.mistral.ai/v1/"
|
endpoint: str = "https://api.mistral.ai/v1/"
|
||||||
max_retries: int = 5
|
max_retries: int = 5
|
||||||
timeout: int = 120
|
timeout: int = 120
|
||||||
|
wait_time: int = 30
|
||||||
max_concurrent_requests: int = 64
|
max_concurrent_requests: int = 64
|
||||||
tokenizer: Tokenizer = Field(default=None)
|
tokenizer: Tokenizer = Field(default=None)
|
||||||
|
|
||||||
@ -215,16 +220,26 @@ class MistralAIEmbeddings(BaseModel, Embeddings):
|
|||||||
List of embeddings, one for each text.
|
List of embeddings, one for each text.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
batch_responses = (
|
batch_responses = []
|
||||||
self.client.post(
|
|
||||||
|
@retry(
|
||||||
|
retry=retry_if_exception_type(httpx.TimeoutException),
|
||||||
|
wait=wait_fixed(self.wait_time),
|
||||||
|
stop=stop_after_attempt(self.max_retries),
|
||||||
|
)
|
||||||
|
def _embed_batch(batch: List[str]) -> Response:
|
||||||
|
response = self.client.post(
|
||||||
url="/embeddings",
|
url="/embeddings",
|
||||||
json=dict(
|
json=dict(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
input=batch,
|
input=batch,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
for batch in self._get_batches(texts)
|
response.raise_for_status()
|
||||||
)
|
return response
|
||||||
|
|
||||||
|
for batch in self._get_batches(texts):
|
||||||
|
batch_responses.append(_embed_batch(batch))
|
||||||
return [
|
return [
|
||||||
list(map(float, embedding_obj["embedding"]))
|
list(map(float, embedding_obj["embedding"]))
|
||||||
for response in batch_responses
|
for response in batch_responses
|
||||||
|
Loading…
Reference in New Issue
Block a user