From 3c4cb503a069ca1b358cc240c4308b82610ae55e Mon Sep 17 00:00:00 2001
From: Mason Daugherty <mason@langchain.dev>
Date: Thu, 21 May 2026 01:19:21 -0500
Subject: [PATCH] fix(fireworks): retry on bare `APIConnectionError`, default
 `max_retries=2` (#37602)

`ChatFireworks` previously left `max_retries` at `None` (single attempt)
and only retried `APITimeoutError`. When the Fireworks edge drops TCP
without an HTTP response, the SDK wraps `httpx.RequestError` as a bare
`APIConnectionError`, which slipped past the retry decorator. Aligning
the default with the Fireworks SDK and `langchain-openai`
(`max_retries=2`) and broadening the retryable parent class closes both
gaps.
---
 .../langchain_fireworks/chat_models.py        | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/libs/partners/fireworks/langchain_fireworks/chat_models.py b/libs/partners/fireworks/langchain_fireworks/chat_models.py
index 446f0ce28ca..0fd4c425bc1 100644
--- a/libs/partners/fireworks/langchain_fireworks/chat_models.py
+++ b/libs/partners/fireworks/langchain_fireworks/chat_models.py
@@ -16,7 +16,7 @@ from typing import (
 
 import httpx
 from fireworks import (
-    APITimeoutError,
+    APIConnectionError,
     AsyncFireworks,
     BadRequestError,
     Fireworks,
@@ -420,7 +420,7 @@ class _RetryableHTTPStatusError(FireworksError):
 
 
 _RETRYABLE_ERRORS: tuple[type[BaseException], ...] = (
-    APITimeoutError,
+    APIConnectionError,
     InternalServerError,
     RateLimitError,
     httpx.TimeoutException,
@@ -466,10 +466,11 @@ def _create_retry_decorator(
     LangChain `run_manager.on_retry` callback. The SDK's own retry layer is
     suppressed via `max_retries=0` on the client; see `validate_environment`.
     """
-    # `max_retries` counts retries *after* the initial attempt.
-    # `create_base_retry_decorator` forwards its `max_retries` to
-    # `stop_after_attempt`, which counts total attempts — so offset by 1.
-    # `None` and `0` both mean "single attempt, no retries".
+    # `max_retries` counts retries *after* the initial attempt (default lives on
+    # the `ChatFireworks.max_retries` field). `create_base_retry_decorator`
+    # forwards its `max_retries` to `stop_after_attempt`, which counts total
+    # attempts — so offset by 1. `None` and `0` both mean "single attempt, no
+    # retries".
     attempts = (llm.max_retries + 1) if llm.max_retries else 1
     return create_base_retry_decorator(
         error_types=list(_RETRYABLE_ERRORS),
@@ -725,13 +726,14 @@ class ChatFireworks(BaseChatModel):
     max_tokens: int | None = None
     """Maximum number of tokens to generate."""
 
-    max_retries: int | None = None
+    max_retries: int | None = 2
     """Maximum number of retries after the initial attempt when generating.
 
     Retries use exponential backoff and trigger on transient errors:
-    `RateLimitError`, `APITimeoutError`, 5xx responses (including those that
-    surface as `httpx.HTTPStatusError` rather than typed SDK errors), and
-    underlying transport errors (`httpx.TimeoutException`, `httpx.TransportError`).
+    `RateLimitError`, `APIConnectionError` (including its `APITimeoutError`
+    subclass), 5xx responses (including those that surface as
+    `httpx.HTTPStatusError` rather than typed SDK errors), and underlying
+    transport errors (`httpx.TimeoutException`, `httpx.TransportError`).
     A value of `None` or `0` disables retries.
     """