fix(fireworks): retry on bare APIConnectionError, default max_retries=2 (#37602)

`ChatFireworks` previously left `max_retries` at `None` (single attempt)
and only retried `APITimeoutError`. When the Fireworks edge drops TCP
without an HTTP response, the SDK wraps `httpx.RequestError` as a bare
`APIConnectionError`, which slipped past the retry decorator. Aligning
the default with the Fireworks SDK and `langchain-openai`
(`max_retries=2`) and broadening the retryable parent class closes both
gaps.
This commit is contained in:
Mason Daugherty
2026-05-21 01:19:21 -05:00
committed by GitHub
parent 9545d05882
commit 3c4cb503a0

View File

@@ -16,7 +16,7 @@ from typing import (
import httpx import httpx
from fireworks import ( from fireworks import (
APITimeoutError, APIConnectionError,
AsyncFireworks, AsyncFireworks,
BadRequestError, BadRequestError,
Fireworks, Fireworks,
@@ -420,7 +420,7 @@ class _RetryableHTTPStatusError(FireworksError):
_RETRYABLE_ERRORS: tuple[type[BaseException], ...] = ( _RETRYABLE_ERRORS: tuple[type[BaseException], ...] = (
APITimeoutError, APIConnectionError,
InternalServerError, InternalServerError,
RateLimitError, RateLimitError,
httpx.TimeoutException, httpx.TimeoutException,
@@ -466,10 +466,11 @@ def _create_retry_decorator(
LangChain `run_manager.on_retry` callback. The SDK's own retry layer is LangChain `run_manager.on_retry` callback. The SDK's own retry layer is
suppressed via `max_retries=0` on the client; see `validate_environment`. suppressed via `max_retries=0` on the client; see `validate_environment`.
""" """
# `max_retries` counts retries *after* the initial attempt. # `max_retries` counts retries *after* the initial attempt (default lives on
# `create_base_retry_decorator` forwards its `max_retries` to # the `ChatFireworks.max_retries` field). `create_base_retry_decorator`
# `stop_after_attempt`, which counts total attempts — so offset by 1. # forwards its `max_retries` to `stop_after_attempt`, which counts total
# `None` and `0` both mean "single attempt, no retries". # attempts — so offset by 1. `None` and `0` both mean "single attempt, no
# retries".
attempts = (llm.max_retries + 1) if llm.max_retries else 1 attempts = (llm.max_retries + 1) if llm.max_retries else 1
return create_base_retry_decorator( return create_base_retry_decorator(
error_types=list(_RETRYABLE_ERRORS), error_types=list(_RETRYABLE_ERRORS),
@@ -725,13 +726,14 @@ class ChatFireworks(BaseChatModel):
max_tokens: int | None = None max_tokens: int | None = None
"""Maximum number of tokens to generate.""" """Maximum number of tokens to generate."""
max_retries: int | None = None max_retries: int | None = 2
"""Maximum number of retries after the initial attempt when generating. """Maximum number of retries after the initial attempt when generating.
Retries use exponential backoff and trigger on transient errors: Retries use exponential backoff and trigger on transient errors:
`RateLimitError`, `APITimeoutError`, 5xx responses (including those that `RateLimitError`, `APIConnectionError` (including its `APITimeoutError`
surface as `httpx.HTTPStatusError` rather than typed SDK errors), and subclass), 5xx responses (including those that surface as
underlying transport errors (`httpx.TimeoutException`, `httpx.TransportError`). `httpx.HTTPStatusError` rather than typed SDK errors), and underlying
transport errors (`httpx.TimeoutException`, `httpx.TransportError`).
A value of `None` or `0` disables retries. A value of `None` or `0` disables retries.
""" """