From 933adb0c95140bf310f6ae47f2b7589b16d17203 Mon Sep 17 00:00:00 2001
From: Mason Daugherty <mason@langchain.dev>
Date: Mon, 29 Jun 2026 00:13:48 -0400
Subject: [PATCH] test(fireworks): cover request-level extra headers (#38518)

Fireworks chat users can pass request-specific headers to the SDK, but
the integration did not have targeted coverage or examples for
session-affinity and multi-turn headers. This adds explicit coverage for
sync, async, and streaming calls, and documents the supported invocation
patterns.

## Changes

- Documented `ChatFireworks` request-level `extra_headers` examples for
session affinity and multi-turn sessions, plus the SDK-level
`prompt_cache_key` alternative.
- Added `TestExtraHeaders` coverage showing `extra_headers` reach
top-level SDK kwargs for sync and streaming calls rather than being
folded into `extra_body`.
- Covered the async `ainvoke` path so request-specific headers are
verified across the main call modes.
---
 .../langchain_fireworks/chat_models.py        | 25 +++++++-
 .../tests/unit_tests/test_chat_models.py      | 61 +++++++++++++++++++
 2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/libs/partners/fireworks/langchain_fireworks/chat_models.py b/libs/partners/fireworks/langchain_fireworks/chat_models.py
index 79440a6af78..7a4662c66ec 100644
--- a/libs/partners/fireworks/langchain_fireworks/chat_models.py
+++ b/libs/partners/fireworks/langchain_fireworks/chat_models.py
@@ -664,8 +664,31 @@ class ChatFireworks(BaseChatModel):
         ```python
         from langchain_fireworks.chat_models import ChatFireworks
 
-        fireworks = ChatFireworks(model_name="accounts/fireworks/models/gpt-oss-120b")
+        model = ChatFireworks(model_name="accounts/fireworks/models/gpt-oss-120b")
         ```
+
+    Fireworks request headers can be passed with `extra_headers`, including
+    session-affinity headers for prompt caching and multi-turn trajectories.
+    `x-session-affinity` pins requests to a replica for prompt-cache reuse,
+    while `x-multi-turn-session-id` groups the turns of a single trajectory:
+
+    ```python
+    model.invoke(
+        "Hello",
+        extra_headers={
+            "x-session-affinity": "user-42",
+            "x-multi-turn-session-id": "thread-123",
+        },
+    )
+    ```
+
+    For prompt-cache session affinity, the Fireworks SDK also accepts a typed
+    `prompt_cache_key` field (passed as a regular keyword argument), which it
+    treats as the preferred alternative to the raw `x-session-affinity` header:
+
+    ```python
+    model.invoke("Hello", prompt_cache_key="user-42")
+    ```
     """
 
     @property
diff --git a/libs/partners/fireworks/tests/unit_tests/test_chat_models.py b/libs/partners/fireworks/tests/unit_tests/test_chat_models.py
index 693181816b0..e586411fa4a 100644
--- a/libs/partners/fireworks/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/fireworks/tests/unit_tests/test_chat_models.py
@@ -1109,6 +1109,67 @@ class TestConvertChunkToMessageChunk:
         }
 
 
+class TestExtraHeaders:
+    """Tests for request-specific HTTP header plumbing."""
+
+    def test_extra_headers_forwarded_to_sync_create(self) -> None:
+        model = _make_model()
+        model.client = MagicMock()
+        model.client.create.return_value = {
+            "choices": [{"message": {"role": "assistant", "content": "ok"}}],
+            "usage": {},
+        }
+        headers = {
+            "x-session-affinity": "thread-123",
+            "x-multi-turn-session-id": "thread-123",
+        }
+
+        model.invoke("Hello", extra_headers=headers)
+
+        call_kwargs = model.client.create.call_args[1]
+        assert call_kwargs["extra_headers"] == headers
+        # `extra_headers` must reach the SDK at the top level, not be folded
+        # into `extra_body` by `_prepare_sdk_kwargs`.
+        assert "extra_headers" not in call_kwargs.get("extra_body", {})
+
+    async def test_extra_headers_forwarded_to_async_create(self) -> None:
+        model = _make_model()
+        model.async_client = MagicMock()
+        headers = {
+            "x-session-affinity": "thread-123",
+            "x-multi-turn-session-id": "thread-123",
+        }
+
+        async def _create(**_kwargs: Any) -> dict[str, Any]:
+            return {
+                "choices": [{"message": {"role": "assistant", "content": "ok"}}],
+                "usage": {},
+            }
+
+        model.async_client.create = MagicMock(side_effect=_create)
+
+        await model.ainvoke("Hello", extra_headers=headers)
+
+        call_kwargs = model.async_client.create.call_args[1]
+        assert call_kwargs["extra_headers"] == headers
+
+    def test_extra_headers_forwarded_when_streaming(self) -> None:
+        """`extra_headers` must also survive the separate streaming param path."""
+        model = _make_model()
+        model.client = MagicMock()
+        model.client.create.return_value = iter(list(_STREAM_CHUNKS))
+        headers = {
+            "x-session-affinity": "thread-123",
+            "x-multi-turn-session-id": "thread-123",
+        }
+
+        list(model.stream("Hello", extra_headers=headers))
+
+        call_kwargs = model.client.create.call_args[1]
+        assert call_kwargs["extra_headers"] == headers
+        assert "extra_headers" not in call_kwargs.get("extra_body", {})
+
+
 class TestStreamUsage:
     """Tests for the `stream_usage` field and `stream_options` plumbing."""