mirror of
https://github.com/hwchase17/langchain.git
synced 2026-06-09 10:17:00 +00:00
fix(fireworks): retry on bare APIConnectionError, default max_retries=2 (#37602)
`ChatFireworks` previously left `max_retries` at `None` (single attempt) and only retried `APITimeoutError`. When the Fireworks edge drops TCP without an HTTP response, the SDK wraps `httpx.RequestError` as a bare `APIConnectionError`, which slipped past the retry decorator. Aligning the default with the Fireworks SDK and `langchain-openai` (`max_retries=2`) and broadening the retryable parent class closes both gaps.
This commit is contained in:
@@ -16,7 +16,7 @@ from typing import (
|
|||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from fireworks import (
|
from fireworks import (
|
||||||
APITimeoutError,
|
APIConnectionError,
|
||||||
AsyncFireworks,
|
AsyncFireworks,
|
||||||
BadRequestError,
|
BadRequestError,
|
||||||
Fireworks,
|
Fireworks,
|
||||||
@@ -420,7 +420,7 @@ class _RetryableHTTPStatusError(FireworksError):
|
|||||||
|
|
||||||
|
|
||||||
_RETRYABLE_ERRORS: tuple[type[BaseException], ...] = (
|
_RETRYABLE_ERRORS: tuple[type[BaseException], ...] = (
|
||||||
APITimeoutError,
|
APIConnectionError,
|
||||||
InternalServerError,
|
InternalServerError,
|
||||||
RateLimitError,
|
RateLimitError,
|
||||||
httpx.TimeoutException,
|
httpx.TimeoutException,
|
||||||
@@ -466,10 +466,11 @@ def _create_retry_decorator(
|
|||||||
LangChain `run_manager.on_retry` callback. The SDK's own retry layer is
|
LangChain `run_manager.on_retry` callback. The SDK's own retry layer is
|
||||||
suppressed via `max_retries=0` on the client; see `validate_environment`.
|
suppressed via `max_retries=0` on the client; see `validate_environment`.
|
||||||
"""
|
"""
|
||||||
# `max_retries` counts retries *after* the initial attempt.
|
# `max_retries` counts retries *after* the initial attempt (default lives on
|
||||||
# `create_base_retry_decorator` forwards its `max_retries` to
|
# the `ChatFireworks.max_retries` field). `create_base_retry_decorator`
|
||||||
# `stop_after_attempt`, which counts total attempts — so offset by 1.
|
# forwards its `max_retries` to `stop_after_attempt`, which counts total
|
||||||
# `None` and `0` both mean "single attempt, no retries".
|
# attempts — so offset by 1. `None` and `0` both mean "single attempt, no
|
||||||
|
# retries".
|
||||||
attempts = (llm.max_retries + 1) if llm.max_retries else 1
|
attempts = (llm.max_retries + 1) if llm.max_retries else 1
|
||||||
return create_base_retry_decorator(
|
return create_base_retry_decorator(
|
||||||
error_types=list(_RETRYABLE_ERRORS),
|
error_types=list(_RETRYABLE_ERRORS),
|
||||||
@@ -725,13 +726,14 @@ class ChatFireworks(BaseChatModel):
|
|||||||
max_tokens: int | None = None
|
max_tokens: int | None = None
|
||||||
"""Maximum number of tokens to generate."""
|
"""Maximum number of tokens to generate."""
|
||||||
|
|
||||||
max_retries: int | None = None
|
max_retries: int | None = 2
|
||||||
"""Maximum number of retries after the initial attempt when generating.
|
"""Maximum number of retries after the initial attempt when generating.
|
||||||
|
|
||||||
Retries use exponential backoff and trigger on transient errors:
|
Retries use exponential backoff and trigger on transient errors:
|
||||||
`RateLimitError`, `APITimeoutError`, 5xx responses (including those that
|
`RateLimitError`, `APIConnectionError` (including its `APITimeoutError`
|
||||||
surface as `httpx.HTTPStatusError` rather than typed SDK errors), and
|
subclass), 5xx responses (including those that surface as
|
||||||
underlying transport errors (`httpx.TimeoutException`, `httpx.TransportError`).
|
`httpx.HTTPStatusError` rather than typed SDK errors), and underlying
|
||||||
|
transport errors (`httpx.TimeoutException`, `httpx.TransportError`).
|
||||||
A value of `None` or `0` disables retries.
|
A value of `None` or `0` disables retries.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user