mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-12 00:11:17 +00:00
fix(anthropic): remove beta header warning for TTL (#32832)
No longer beta as of Aug 13
This commit is contained in:
@@ -999,24 +999,27 @@ class ChatAnthropic(BaseChatModel):
|
|||||||
|
|
||||||
.. dropdown:: Extended caching
|
.. dropdown:: Extended caching
|
||||||
|
|
||||||
.. versionadded:: 0.3.15
|
|
||||||
|
|
||||||
The cache lifetime is 5 minutes by default. If this is too short, you can
|
The cache lifetime is 5 minutes by default. If this is too short, you can
|
||||||
apply one hour caching by enabling the ``'extended-cache-ttl-2025-04-11'``
|
apply one hour caching by setting ``ttl`` to ``'1h'``.
|
||||||
beta header:
|
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
llm = ChatAnthropic(
|
llm = ChatAnthropic(
|
||||||
model="claude-3-7-sonnet-20250219",
|
model="claude-3-7-sonnet-20250219",
|
||||||
betas=["extended-cache-ttl-2025-04-11"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
and specifying ``"cache_control": {"type": "ephemeral", "ttl": "1h"}``.
|
messages = [{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": f"{long_text}",
|
||||||
|
"cache_control": {"type": "ephemeral", "ttl": "1h"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}]
|
||||||
|
|
||||||
.. important::
|
response = llm.invoke(messages)
|
||||||
Specifying a `ttl` key under `cache_control` will not work unless the
|
|
||||||
beta header is set!
|
|
||||||
|
|
||||||
Details of cached token counts will be included on the ``InputTokenDetails``
|
Details of cached token counts will be included on the ``InputTokenDetails``
|
||||||
of response's ``usage_metadata``:
|
of response's ``usage_metadata``:
|
||||||
@@ -1432,23 +1435,6 @@ class ChatAnthropic(BaseChatModel):
|
|||||||
# If cache_control is provided in kwargs, add it to last message
|
# If cache_control is provided in kwargs, add it to last message
|
||||||
# and content block.
|
# and content block.
|
||||||
if "cache_control" in kwargs and formatted_messages:
|
if "cache_control" in kwargs and formatted_messages:
|
||||||
cache_control = kwargs["cache_control"]
|
|
||||||
|
|
||||||
# Validate TTL usage requires extended cache TTL beta header
|
|
||||||
if (
|
|
||||||
isinstance(cache_control, dict)
|
|
||||||
and "ttl" in cache_control
|
|
||||||
and (
|
|
||||||
not self.betas or "extended-cache-ttl-2025-04-11" not in self.betas
|
|
||||||
)
|
|
||||||
):
|
|
||||||
msg = (
|
|
||||||
"Specifying a 'ttl' under 'cache_control' requires enabling "
|
|
||||||
"the 'extended-cache-ttl-2025-04-11' beta header. "
|
|
||||||
"Set betas=['extended-cache-ttl-2025-04-11'] when initializing "
|
|
||||||
"ChatAnthropic."
|
|
||||||
)
|
|
||||||
warnings.warn(msg, stacklevel=2)
|
|
||||||
if isinstance(formatted_messages[-1]["content"], list):
|
if isinstance(formatted_messages[-1]["content"], list):
|
||||||
formatted_messages[-1]["content"][-1]["cache_control"] = kwargs.pop(
|
formatted_messages[-1]["content"][-1]["cache_control"] = kwargs.pop(
|
||||||
"cache_control"
|
"cache_control"
|
||||||
|
Reference in New Issue
Block a user