test(openai): add tests for prompt_cache_key parameter and update docs (#32363)

Introduce tests to validate the behavior and inclusion of the `prompt_cache_key` parameter in request payloads for the `ChatOpenAI` model.
2025-09-17 23:41:46 +00:00 · 2025-08-07 15:29:47 -04:00
parent 68c70da33e
commit 145d38f7dd
3 changed files with 152 additions and 0 deletions
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -2731,6 +2731,31 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
            Always use ``extra_body`` for custom parameters, **not** ``model_kwargs``.
            Using ``model_kwargs`` for non-OpenAI parameters will cause API errors.

+    .. dropdown:: Prompt caching optimization
+
+        For high-volume applications with repetitive prompts, use ``prompt_cache_key``
+        per-invocation to improve cache hit rates and reduce costs:
+
+        .. code-block:: python
+
+            llm = ChatOpenAI(model="gpt-4o-mini")
+
+            response = llm.invoke(
+                messages,
+                prompt_cache_key="example-key-a",  # Routes to same machine for cache hits
+            )
+
+            customer_response = llm.invoke(messages, prompt_cache_key="example-key-b")
+            support_response = llm.invoke(messages, prompt_cache_key="example-key-c")
+
+            # Dynamic cache keys based on context
+            cache_key = f"example-key-{dynamic_suffix}"
+            response = llm.invoke(messages, prompt_cache_key=cache_key)
+
+        Cache keys help ensure requests with the same prompt prefix are routed to
+        machines with existing cache, providing cost reduction and latency improvement on
+        cached tokens.
+
    """  # noqa: E501

    max_tokens: Optional[int] = Field(default=None, alias="max_completion_tokens")