diff --git a/docs/docs/integrations/chat/anthropic.ipynb b/docs/docs/integrations/chat/anthropic.ipynb index d626b4a70fe..cbe5490668b 100644 --- a/docs/docs/integrations/chat/anthropic.ipynb +++ b/docs/docs/integrations/chat/anthropic.ipynb @@ -1191,6 +1191,40 @@ "response.content" ] }, + { + "cell_type": "markdown", + "id": "74247a07-b153-444f-9c56-77659aeefc88", + "metadata": {}, + "source": [ + "## Context management\n", + "\n", + "Anthropic supports a context editing feature that will automatically manage the model's context window (e.g., by clearing tool results).\n", + "\n", + "See [Anthropic documentation](https://docs.claude.com/en/docs/build-with-claude/context-editing) for details and configuration options.\n", + "\n", + ":::info\n", + "Requires ``langchain-anthropic>=0.3.21``\n", + ":::" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbb79c5d-37b5-4212-b36f-f27366192cf9", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_anthropic import ChatAnthropic\n", + "\n", + "llm = ChatAnthropic(\n", + " model=\"claude-sonnet-4-5-20250929\",\n", + " betas=[\"context-management-2025-06-27\"],\n", + " context_management={\"edits\": [{\"type\": \"clear_tool_uses_20250919\"}]},\n", + ")\n", + "llm_with_tools = llm.bind_tools([{\"type\": \"web_search_20250305\", \"name\": \"web_search\"}])\n", + "response = llm_with_tools.invoke(\"Search for recent developments in AI\")" + ] + }, { "cell_type": "markdown", "id": "cbfec7a9-d9df-4d12-844e-d922456dd9bf", @@ -1457,6 +1491,38 @@ "" ] }, + { + "cell_type": "markdown", + "id": "29405da2-d2ef-415c-b674-6e29073cd05e", + "metadata": {}, + "source": [ + "### Memory tool\n", + "\n", + "Claude supports a memory tool for client-side storage and retrieval of context across conversational threads. See docs [here](https://docs.claude.com/en/docs/agents-and-tools/tool-use/memory-tool) for details.\n", + "\n", + ":::info\n", + "Requires ``langchain-anthropic>=0.3.21``\n", + ":::" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbd76eaa-041f-4fb8-8346-ca8fe0001c01", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_anthropic import ChatAnthropic\n", + "\n", + "llm = ChatAnthropic(\n", + " model=\"claude-sonnet-4-5-20250929\",\n", + " betas=[\"context-management-2025-06-27\"],\n", + ")\n", + "llm_with_tools = llm.bind_tools([{\"type\": \"memory_20250818\", \"name\": \"memory\"}])\n", + "\n", + "response = llm_with_tools.invoke(\"What are my interests?\")" + ] + }, { "cell_type": "markdown", "id": "040f381a-1768-479a-9a5e-aa2d7d77e0d5", diff --git a/libs/langchain/langchain/chat_models/base.py b/libs/langchain/langchain/chat_models/base.py index 0939d520d37..285e4f18b62 100644 --- a/libs/langchain/langchain/chat_models/base.py +++ b/libs/langchain/langchain/chat_models/base.py @@ -118,7 +118,7 @@ def init_chat_model( Will attempt to infer model_provider from model if not specified. The following providers will be inferred based on these model prefixes: - - ``gpt-3...`` | ``gpt-4...`` | ``o1...`` -> ``openai`` + - ``gpt-...`` | ``o1...`` | ``o3...`` -> ``openai`` - ``claude...`` -> ``anthropic`` - ``amazon...`` -> ``bedrock`` - ``gemini...`` -> ``google_vertexai`` @@ -497,7 +497,7 @@ _SUPPORTED_PROVIDERS = { def _attempt_infer_model_provider(model_name: str) -> Optional[str]: - if any(model_name.startswith(pre) for pre in ("gpt-3", "gpt-4", "o1", "o3")): + if any(model_name.startswith(pre) for pre in ("gpt-", "o1", "o3")): return "openai" if model_name.startswith("claude"): return "anthropic" diff --git a/libs/langchain/tests/unit_tests/chat_models/test_base.py b/libs/langchain/tests/unit_tests/chat_models/test_base.py index b67ff41a916..6a245318db8 100644 --- a/libs/langchain/tests/unit_tests/chat_models/test_base.py +++ b/libs/langchain/tests/unit_tests/chat_models/test_base.py @@ -270,6 +270,7 @@ def test_configurable_with_default() -> None: "stop_sequences": None, "anthropic_api_url": "https://api.anthropic.com", "anthropic_proxy": None, + "context_management": None, "anthropic_api_key": SecretStr("bar"), "betas": None, "default_headers": None, diff --git a/libs/langchain_v1/langchain/agents/middleware/__init__.py b/libs/langchain_v1/langchain/agents/middleware/__init__.py index 014989516ae..64d06e85445 100644 --- a/libs/langchain_v1/langchain/agents/middleware/__init__.py +++ b/libs/langchain_v1/langchain/agents/middleware/__init__.py @@ -1,6 +1,7 @@ """Middleware plugins for agents.""" from .human_in_the_loop import HumanInTheLoopMiddleware +from .planning import PlanningMiddleware from .prompt_caching import AnthropicPromptCachingMiddleware from .summarization import SummarizationMiddleware from .types import AgentMiddleware, AgentState, ModelRequest @@ -12,5 +13,6 @@ __all__ = [ "AnthropicPromptCachingMiddleware", "HumanInTheLoopMiddleware", "ModelRequest", + "PlanningMiddleware", "SummarizationMiddleware", ] diff --git a/libs/langchain_v1/langchain/agents/middleware/planning.py b/libs/langchain_v1/langchain/agents/middleware/planning.py new file mode 100644 index 00000000000..5fb451dcf55 --- /dev/null +++ b/libs/langchain_v1/langchain/agents/middleware/planning.py @@ -0,0 +1,197 @@ +"""Planning and task management middleware for agents.""" +# ruff: noqa: E501 + +from __future__ import annotations + +from typing import Annotated, Literal + +from langchain_core.messages import ToolMessage +from langchain_core.tools import tool +from langgraph.types import Command +from typing_extensions import NotRequired, TypedDict + +from langchain.agents.middleware.types import AgentMiddleware, AgentState, ModelRequest +from langchain.tools import InjectedToolCallId + + +class Todo(TypedDict): + """A single todo item with content and status.""" + + content: str + """The content/description of the todo item.""" + + status: Literal["pending", "in_progress", "completed"] + """The current status of the todo item.""" + + +class PlanningState(AgentState): + """State schema for the todo middleware.""" + + todos: NotRequired[list[Todo]] + """List of todo items for tracking task progress.""" + + +WRITE_TODOS_TOOL_DESCRIPTION = """Use this tool to create and manage a structured task list for your current work session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user. + +Only use this tool if you think it will be helpful in staying organized. If the user's request is trivial and takes less than 3 steps, it is better to NOT use this tool and just do the task directly. + +## When to Use This Tool +Use this tool in these scenarios: + +1. Complex multi-step tasks - When a task requires 3 or more distinct steps or actions +2. Non-trivial and complex tasks - Tasks that require careful planning or multiple operations +3. User explicitly requests todo list - When the user directly asks you to use the todo list +4. User provides multiple tasks - When users provide a list of things to be done (numbered or comma-separated) +5. The plan may need future revisions or updates based on results from the first few steps + +## How to Use This Tool +1. When you start working on a task - Mark it as in_progress BEFORE beginning work. +2. After completing a task - Mark it as completed and add any new follow-up tasks discovered during implementation. +3. You can also update future tasks, such as deleting them if they are no longer necessary, or adding new tasks that are necessary. Don't change previously completed tasks. +4. You can make several updates to the todo list at once. For example, when you complete a task, you can mark the next task you need to start as in_progress. + +## When NOT to Use This Tool +It is important to skip using this tool when: +1. There is only a single, straightforward task +2. The task is trivial and tracking it provides no benefit +3. The task can be completed in less than 3 trivial steps +4. The task is purely conversational or informational + +## Task States and Management + +1. **Task States**: Use these states to track progress: + - pending: Task not yet started + - in_progress: Currently working on (you can have multiple tasks in_progress at a time if they are not related to each other and can be run in parallel) + - completed: Task finished successfully + +2. **Task Management**: + - Update task status in real-time as you work + - Mark tasks complete IMMEDIATELY after finishing (don't batch completions) + - Complete current tasks before starting new ones + - Remove tasks that are no longer relevant from the list entirely + - IMPORTANT: When you write this todo list, you should mark your first task (or tasks) as in_progress immediately!. + - IMPORTANT: Unless all tasks are completed, you should always have at least one task in_progress to show the user that you are working on something. + +3. **Task Completion Requirements**: + - ONLY mark a task as completed when you have FULLY accomplished it + - If you encounter errors, blockers, or cannot finish, keep the task as in_progress + - When blocked, create a new task describing what needs to be resolved + - Never mark a task as completed if: + - There are unresolved issues or errors + - Work is partial or incomplete + - You encountered blockers that prevent completion + - You couldn't find necessary resources or dependencies + - Quality standards haven't been met + +4. **Task Breakdown**: + - Create specific, actionable items + - Break complex tasks into smaller, manageable steps + - Use clear, descriptive task names + +Being proactive with task management demonstrates attentiveness and ensures you complete all requirements successfully +Remember: If you only need to make a few tool calls to complete a task, and it is clear what you need to do, it is better to just do the task directly and NOT call this tool at all.""" + +WRITE_TODOS_SYSTEM_PROMPT = """## `write_todos` + +You have access to the `write_todos` tool to help you manage and plan complex objectives. +Use this tool for complex objectives to ensure that you are tracking each necessary step and giving the user visibility into your progress. +This tool is very helpful for planning complex objectives, and for breaking down these larger complex objectives into smaller steps. + +It is critical that you mark todos as completed as soon as you are done with a step. Do not batch up multiple steps before marking them as completed. +For simple objectives that only require a few steps, it is better to just complete the objective directly and NOT use this tool. +Writing todos takes time and tokens, use it when it is helpful for managing complex many-step problems! But not for simple few-step requests. + +## Important To-Do List Usage Notes to Remember +- The `write_todos` tool should never be called multiple times in parallel. +- Don't be afraid to revise the To-Do list as you go. New information may reveal new tasks that need to be done, or old tasks that are irrelevant.""" + + +@tool(description=WRITE_TODOS_TOOL_DESCRIPTION) +def write_todos(todos: list[Todo], tool_call_id: Annotated[str, InjectedToolCallId]) -> Command: + """Create and manage a structured task list for your current work session.""" + return Command( + update={ + "todos": todos, + "messages": [ToolMessage(f"Updated todo list to {todos}", tool_call_id=tool_call_id)], + } + ) + + +class PlanningMiddleware(AgentMiddleware): + """Middleware that provides todo list management capabilities to agents. + + This middleware adds a `write_todos` tool that allows agents to create and manage + structured task lists for complex multi-step operations. It's designed to help + agents track progress, organize complex tasks, and provide users with visibility + into task completion status. + + The middleware automatically injects system prompts that guide the agent on when + and how to use the todo functionality effectively. + + Example: + ```python + from langchain.agents.middleware.planning import PlanningMiddleware + from langchain.agents import create_agent + + agent = create_agent("openai:gpt-4o", middleware=[PlanningMiddleware()]) + + # Agent now has access to write_todos tool and todo state tracking + result = await agent.invoke({"messages": [HumanMessage("Help me refactor my codebase")]}) + + print(result["todos"]) # Array of todo items with status tracking + ``` + + Args: + system_prompt: Custom system prompt to guide the agent on using the todo tool. + If not provided, uses the default ``WRITE_TODOS_SYSTEM_PROMPT``. + tool_description: Custom description for the write_todos tool. + If not provided, uses the default ``WRITE_TODOS_TOOL_DESCRIPTION``. + """ + + state_schema = PlanningState + + def __init__( + self, + *, + system_prompt: str = WRITE_TODOS_SYSTEM_PROMPT, + tool_description: str = WRITE_TODOS_TOOL_DESCRIPTION, + ) -> None: + """Initialize the PlanningMiddleware with optional custom prompts. + + Args: + system_prompt: Custom system prompt to guide the agent on using the todo tool. + tool_description: Custom description for the write_todos tool. + """ + super().__init__() + self.system_prompt = system_prompt + self.tool_description = tool_description + + # Dynamically create the write_todos tool with the custom description + @tool(description=self.tool_description) + def write_todos( + todos: list[Todo], tool_call_id: Annotated[str, InjectedToolCallId] + ) -> Command: + """Create and manage a structured task list for your current work session.""" + return Command( + update={ + "todos": todos, + "messages": [ + ToolMessage(f"Updated todo list to {todos}", tool_call_id=tool_call_id) + ], + } + ) + + self.tools = [write_todos] + + def modify_model_request( # type: ignore[override] + self, + request: ModelRequest, + state: PlanningState, # noqa: ARG002 + ) -> ModelRequest: + """Update the system prompt to include the todo system prompt.""" + request.system_prompt = ( + request.system_prompt + "\n\n" + self.system_prompt + if request.system_prompt + else self.system_prompt + ) + return request diff --git a/libs/langchain_v1/langchain/chat_models/base.py b/libs/langchain_v1/langchain/chat_models/base.py index a062d348230..765cffb4d10 100644 --- a/libs/langchain_v1/langchain/chat_models/base.py +++ b/libs/langchain_v1/langchain/chat_models/base.py @@ -109,7 +109,7 @@ def init_chat_model( Will attempt to infer model_provider from model if not specified. The following providers will be inferred based on these model prefixes: - - 'gpt-3...' | 'gpt-4...' | 'o1...' -> 'openai' + - 'gpt-...' | 'o1...' | 'o3...' -> 'openai' - 'claude...' -> 'anthropic' - 'amazon....' -> 'bedrock' - 'gemini...' -> 'google_vertexai' @@ -474,7 +474,7 @@ _SUPPORTED_PROVIDERS = { def _attempt_infer_model_provider(model_name: str) -> str | None: - if any(model_name.startswith(pre) for pre in ("gpt-3", "gpt-4", "o1", "o3")): + if any(model_name.startswith(pre) for pre in ("gpt-", "o1", "o3")): return "openai" if model_name.startswith("claude"): return "anthropic" diff --git a/libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py b/libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py index 519c99e9a0d..62d4addccb2 100644 --- a/libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py +++ b/libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py @@ -1,14 +1,9 @@ -import pytest +import warnings +from types import ModuleType from typing import Any from unittest.mock import patch -from types import ModuleType -from syrupy.assertion import SnapshotAssertion - -import warnings -from langgraph.runtime import Runtime -from typing_extensions import Annotated -from pydantic import BaseModel, Field +import pytest from langchain_core.language_models import BaseChatModel from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import ( @@ -18,31 +13,42 @@ from langchain_core.messages import ( ToolCall, ToolMessage, ) -from langchain_core.tools import tool, InjectedToolCallId +from langchain_core.outputs import ChatGeneration, ChatResult +from langchain_core.tools import InjectedToolCallId, tool +from langgraph.checkpoint.base import BaseCheckpointSaver +from langgraph.checkpoint.memory import InMemorySaver +from langgraph.constants import END +from langgraph.graph.message import REMOVE_ALL_MESSAGES +from langgraph.runtime import Runtime +from langgraph.types import Command +from pydantic import BaseModel, Field +from syrupy.assertion import SnapshotAssertion +from typing_extensions import Annotated -from langchain.agents.middleware_agent import create_agent -from langchain.tools import InjectedState from langchain.agents.middleware.human_in_the_loop import ( - HumanInTheLoopMiddleware, ActionRequest, + HumanInTheLoopMiddleware, +) +from langchain.agents.middleware.planning import ( + PlanningMiddleware, + PlanningState, + WRITE_TODOS_SYSTEM_PROMPT, + write_todos, + WRITE_TODOS_TOOL_DESCRIPTION, ) from langchain.agents.middleware.prompt_caching import AnthropicPromptCachingMiddleware from langchain.agents.middleware.summarization import SummarizationMiddleware from langchain.agents.middleware.types import ( AgentMiddleware, - ModelRequest, AgentState, + ModelRequest, OmitFromInput, OmitFromOutput, PrivateStateAttr, ) - -from langgraph.checkpoint.base import BaseCheckpointSaver -from langgraph.checkpoint.memory import InMemorySaver -from langgraph.constants import END -from langgraph.graph.message import REMOVE_ALL_MESSAGES -from langgraph.types import Command +from langchain.agents.middleware_agent import create_agent from langchain.agents.structured_output import ToolStrategy +from langchain.tools import InjectedState from .messages import _AnyIdHumanMessage, _AnyIdToolMessage from .model import FakeToolCallingModel @@ -1105,14 +1111,9 @@ def test_summarization_middleware_summary_creation() -> None: class MockModel(BaseChatModel): def invoke(self, prompt): - from langchain_core.messages import AIMessage - return AIMessage(content="Generated summary") def _generate(self, messages, **kwargs): - from langchain_core.outputs import ChatResult, ChatGeneration - from langchain_core.messages import AIMessage - return ChatResult(generations=[ChatGeneration(message=AIMessage(content="Summary"))]) @property @@ -1136,9 +1137,6 @@ def test_summarization_middleware_summary_creation() -> None: raise Exception("Model error") def _generate(self, messages, **kwargs): - from langchain_core.outputs import ChatResult, ChatGeneration - from langchain_core.messages import AIMessage - return ChatResult(generations=[ChatGeneration(message=AIMessage(content="Summary"))]) @property @@ -1155,14 +1153,9 @@ def test_summarization_middleware_full_workflow() -> None: class MockModel(BaseChatModel): def invoke(self, prompt): - from langchain_core.messages import AIMessage - return AIMessage(content="Generated summary") def _generate(self, messages, **kwargs): - from langchain_core.outputs import ChatResult, ChatGeneration - from langchain_core.messages import AIMessage - return ChatResult(generations=[ChatGeneration(message=AIMessage(content="Summary"))]) @property @@ -1423,3 +1416,248 @@ def test_jump_to_is_ephemeral() -> None: agent = agent.compile() result = agent.invoke({"messages": [HumanMessage("Hello")]}) assert "jump_to" not in result + + +# Tests for PlanningMiddleware +def test_planning_middleware_initialization() -> None: + """Test that PlanningMiddleware initializes correctly.""" + middleware = PlanningMiddleware() + assert middleware.state_schema == PlanningState + assert len(middleware.tools) == 1 + assert middleware.tools[0].name == "write_todos" + + +@pytest.mark.parametrize( + "original_prompt,expected_prompt_prefix", + [ + ("Original prompt", "Original prompt\n\n## `write_todos`"), + (None, "## `write_todos`"), + ], +) +def test_planning_middleware_modify_model_request(original_prompt, expected_prompt_prefix) -> None: + """Test that modify_model_request handles system prompts correctly.""" + middleware = PlanningMiddleware() + model = FakeToolCallingModel() + + request = ModelRequest( + model=model, + system_prompt=original_prompt, + messages=[HumanMessage(content="Hello")], + tool_choice=None, + tools=[], + response_format=None, + model_settings={}, + ) + + state: PlanningState = {"messages": [HumanMessage(content="Hello")]} + modified_request = middleware.modify_model_request(request, state) + assert modified_request.system_prompt.startswith(expected_prompt_prefix) + + +@pytest.mark.parametrize( + "todos,expected_message", + [ + ([], "Updated todo list to []"), + ( + [{"content": "Task 1", "status": "pending"}], + "Updated todo list to [{'content': 'Task 1', 'status': 'pending'}]", + ), + ( + [ + {"content": "Task 1", "status": "pending"}, + {"content": "Task 2", "status": "in_progress"}, + ], + "Updated todo list to [{'content': 'Task 1', 'status': 'pending'}, {'content': 'Task 2', 'status': 'in_progress'}]", + ), + ( + [ + {"content": "Task 1", "status": "pending"}, + {"content": "Task 2", "status": "in_progress"}, + {"content": "Task 3", "status": "completed"}, + ], + "Updated todo list to [{'content': 'Task 1', 'status': 'pending'}, {'content': 'Task 2', 'status': 'in_progress'}, {'content': 'Task 3', 'status': 'completed'}]", + ), + ], +) +def test_planning_middleware_write_todos_tool_execution(todos, expected_message) -> None: + """Test that the write_todos tool executes correctly.""" + tool_call = { + "args": {"todos": todos}, + "name": "write_todos", + "type": "tool_call", + "id": "test_call", + } + result = write_todos.invoke(tool_call) + assert result.update["todos"] == todos + assert result.update["messages"][0].content == expected_message + + +@pytest.mark.parametrize( + "invalid_todos", + [ + [{"content": "Task 1", "status": "invalid_status"}], + [{"status": "pending"}], + ], +) +def test_planning_middleware_write_todos_tool_validation_errors(invalid_todos) -> None: + """Test that the write_todos tool rejects invalid input.""" + tool_call = { + "args": {"todos": invalid_todos}, + "name": "write_todos", + "type": "tool_call", + "id": "test_call", + } + with pytest.raises(Exception): + write_todos.invoke(tool_call) + + +def test_planning_middleware_agent_creation_with_middleware() -> None: + """Test that an agent can be created with the planning middleware.""" + model = FakeToolCallingModel( + tool_calls=[ + [ + { + "args": {"todos": [{"content": "Task 1", "status": "pending"}]}, + "name": "write_todos", + "type": "tool_call", + "id": "test_call", + } + ], + [ + { + "args": {"todos": [{"content": "Task 1", "status": "in_progress"}]}, + "name": "write_todos", + "type": "tool_call", + "id": "test_call", + } + ], + [ + { + "args": {"todos": [{"content": "Task 1", "status": "completed"}]}, + "name": "write_todos", + "type": "tool_call", + "id": "test_call", + } + ], + [], + ] + ) + middleware = PlanningMiddleware() + agent = create_agent(model=model, middleware=[middleware]) + agent = agent.compile() + + result = agent.invoke({"messages": [HumanMessage("Hello")]}) + assert result["todos"] == [{"content": "Task 1", "status": "completed"}] + + # human message (1) + # ai message (2) - initial todo + # tool message (3) + # ai message (4) - updated todo + # tool message (5) + # ai message (6) - complete todo + # tool message (7) + # ai message (8) - no tool calls + assert len(result["messages"]) == 8 + + +def test_planning_middleware_custom_system_prompt() -> None: + """Test that PlanningMiddleware can be initialized with custom system prompt.""" + custom_system_prompt = "Custom todo system prompt for testing" + middleware = PlanningMiddleware(system_prompt=custom_system_prompt) + model = FakeToolCallingModel() + + request = ModelRequest( + model=model, + system_prompt="Original prompt", + messages=[HumanMessage(content="Hello")], + tool_choice=None, + tools=[], + response_format=None, + model_settings={}, + ) + + state: PlanningState = {"messages": [HumanMessage(content="Hello")]} + modified_request = middleware.modify_model_request(request, state) + assert modified_request.system_prompt == f"Original prompt\n\n{custom_system_prompt}" + + +def test_planning_middleware_custom_tool_description() -> None: + """Test that PlanningMiddleware can be initialized with custom tool description.""" + custom_tool_description = "Custom tool description for testing" + middleware = PlanningMiddleware(tool_description=custom_tool_description) + + assert len(middleware.tools) == 1 + tool = middleware.tools[0] + assert tool.description == custom_tool_description + + +def test_planning_middleware_custom_system_prompt_and_tool_description() -> None: + """Test that PlanningMiddleware can be initialized with both custom prompts.""" + custom_system_prompt = "Custom system prompt" + custom_tool_description = "Custom tool description" + middleware = PlanningMiddleware( + system_prompt=custom_system_prompt, + tool_description=custom_tool_description, + ) + + # Verify system prompt + model = FakeToolCallingModel() + request = ModelRequest( + model=model, + system_prompt=None, + messages=[HumanMessage(content="Hello")], + tool_choice=None, + tools=[], + response_format=None, + model_settings={}, + ) + + state: PlanningState = {"messages": [HumanMessage(content="Hello")]} + modified_request = middleware.modify_model_request(request, state) + assert modified_request.system_prompt == custom_system_prompt + + # Verify tool description + assert len(middleware.tools) == 1 + tool = middleware.tools[0] + assert tool.description == custom_tool_description + + +def test_planning_middleware_default_prompts() -> None: + """Test that PlanningMiddleware uses default prompts when none provided.""" + middleware = PlanningMiddleware() + + # Verify default system prompt + assert middleware.system_prompt == WRITE_TODOS_SYSTEM_PROMPT + + # Verify default tool description + assert middleware.tool_description == WRITE_TODOS_TOOL_DESCRIPTION + assert len(middleware.tools) == 1 + tool = middleware.tools[0] + assert tool.description == WRITE_TODOS_TOOL_DESCRIPTION + + +def test_planning_middleware_custom_system_prompt() -> None: + """Test that custom tool executes correctly in an agent.""" + middleware = PlanningMiddleware(system_prompt="call the write_todos tool") + + model = FakeToolCallingModel( + tool_calls=[ + [ + { + "args": {"todos": [{"content": "Custom task", "status": "pending"}]}, + "name": "write_todos", + "type": "tool_call", + "id": "test_call", + } + ], + [], + ] + ) + + agent = create_agent(model=model, middleware=[middleware]) + agent = agent.compile() + + result = agent.invoke({"messages": [HumanMessage("Hello")]}) + assert result["todos"] == [{"content": "Custom task", "status": "pending"}] + # assert custom system prompt is in the first AI message + assert "call the write_todos tool" in result["messages"][1].content diff --git a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py index 0bcef4d0201..d7b03ac1eab 100644 --- a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py +++ b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py @@ -267,6 +267,7 @@ def test_configurable_with_default() -> None: "stop_sequences": None, "anthropic_api_url": "https://api.anthropic.com", "anthropic_proxy": None, + "context_management": None, "anthropic_api_key": SecretStr("bar"), "betas": None, "default_headers": None, diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index d7e617f3083..670c50fe0ff 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -12,7 +12,6 @@ from operator import itemgetter from typing import Any, Callable, Final, Literal, Optional, Union, cast import anthropic -from langchain_core._api import beta from langchain_core.callbacks import ( AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun, @@ -119,6 +118,7 @@ def _is_builtin_tool(tool: Any) -> bool: "web_search_", "web_fetch_", "code_execution_", + "memory_", ] return any(tool_type.startswith(prefix) for prefix in _builtin_tool_prefixes) @@ -1251,6 +1251,25 @@ class ChatAnthropic(BaseChatModel): Total tokens: 408 + Context management: + Anthropic supports a context editing feature that will automatically manage the + model's context window (e.g., by clearing tool results). + + See `Anthropic documentation `__ + for details and configuration options. + + .. code-block:: python + + from langchain_anthropic import ChatAnthropic + + llm = ChatAnthropic( + model="claude-sonnet-4-5-20250929", + betas=["context-management-2025-06-27"], + context_management={"edits": [{"type": "clear_tool_uses_20250919"}]}, + ) + llm_with_tools = llm.bind_tools([{"type": "web_search_20250305", "name": "web_search"}]) + response = llm_with_tools.invoke("Search for recent developments in AI") + Built-in tools: See LangChain `docs `__ for more detail. @@ -1364,6 +1383,19 @@ class ChatAnthropic(BaseChatModel): 'id': 'toolu_01VdNgt1YV7kGfj9LFLm6HyQ', 'type': 'tool_call'}] + .. dropdown:: Memory tool + + .. code-block:: python + + from langchain_anthropic import ChatAnthropic + + llm = ChatAnthropic( + model="claude-sonnet-4-5-20250929", + betas=["context-management-2025-06-27"], + ) + llm_with_tools = llm.bind_tools([{"type": "memory_20250818", "name": "memory"}]) + response = llm_with_tools.invoke("What are my interests?") + Response metadata .. code-block:: python @@ -1471,6 +1503,11 @@ class ChatAnthropic(BaseChatModel): "name": "example-mcp"}]`` """ + context_management: Optional[dict[str, Any]] = None + """Configuration for + `context management `__. + """ + @property def _llm_type(self) -> str: """Return type of chat model.""" @@ -1659,6 +1696,7 @@ class ChatAnthropic(BaseChatModel): "top_p": self.top_p, "stop_sequences": stop or self.stop_sequences, "betas": self.betas, + "context_management": self.context_management, "mcp_servers": self.mcp_servers, "system": system, **self.model_kwargs, @@ -2318,7 +2356,6 @@ class ChatAnthropic(BaseChatModel): return RunnableMap(raw=llm) | parser_with_fallback return llm | output_parser - @beta() def get_num_tokens_from_messages( self, messages: list[BaseMessage], @@ -2333,8 +2370,8 @@ class ChatAnthropic(BaseChatModel): messages: The message inputs to tokenize. tools: If provided, sequence of dict, BaseModel, function, or BaseTools to be converted to tool schemas. - kwargs: Additional keyword arguments are passed to the - :meth:`~langchain_anthropic.chat_models.ChatAnthropic.bind` method. + kwargs: Additional keyword arguments are passed to the Anthropic + ``messages.count_tokens`` method. Basic usage: @@ -2369,7 +2406,7 @@ class ChatAnthropic(BaseChatModel): def get_weather(location: str) -> str: \"\"\"Get the current weather in a given location - Args: + Args: location: The city and state, e.g. San Francisco, CA \"\"\" return "Sunny" @@ -2387,15 +2424,24 @@ class ChatAnthropic(BaseChatModel): Uses Anthropic's `token counting API `__ to count tokens in messages. - """ # noqa: E501 + """ # noqa: D214,E501 formatted_system, formatted_messages = _format_messages(messages) if isinstance(formatted_system, str): kwargs["system"] = formatted_system if tools: kwargs["tools"] = [convert_to_anthropic_tool(tool) for tool in tools] + if self.context_management is not None: + kwargs["context_management"] = self.context_management - response = self._client.beta.messages.count_tokens( - betas=["token-counting-2024-11-01"], + if self.betas is not None: + beta_response = self._client.beta.messages.count_tokens( + betas=self.betas, + model=self.model, + messages=formatted_messages, # type: ignore[arg-type] + **kwargs, + ) + return beta_response.input_tokens + response = self._client.messages.count_tokens( model=self.model, messages=formatted_messages, # type: ignore[arg-type] **kwargs, @@ -2508,7 +2554,7 @@ def _make_message_chunk_from_anthropic_event( # Capture model name, but don't include usage_metadata yet # as it will be properly reported in message_delta with complete info if hasattr(event.message, "model"): - response_metadata = {"model_name": event.message.model} + response_metadata: dict[str, Any] = {"model_name": event.message.model} else: response_metadata = {} @@ -2609,13 +2655,16 @@ def _make_message_chunk_from_anthropic_event( # Process final usage metadata and completion info elif event.type == "message_delta" and stream_usage: usage_metadata = _create_usage_metadata(event.usage) + response_metadata = { + "stop_reason": event.delta.stop_reason, + "stop_sequence": event.delta.stop_sequence, + } + if context_management := getattr(event, "context_management", None): + response_metadata["context_management"] = context_management.model_dump() message_chunk = AIMessageChunk( content="" if coerce_content_to_string else [], usage_metadata=usage_metadata, - response_metadata={ - "stop_reason": event.delta.stop_reason, - "stop_sequence": event.delta.stop_sequence, - }, + response_metadata=response_metadata, ) if message_chunk.response_metadata.get("stop_reason"): # Mark final Anthropic stream chunk diff --git a/libs/partners/anthropic/pyproject.toml b/libs/partners/anthropic/pyproject.toml index 9884ffec27b..b69915a8d1b 100644 --- a/libs/partners/anthropic/pyproject.toml +++ b/libs/partners/anthropic/pyproject.toml @@ -7,7 +7,7 @@ authors = [] license = { text = "MIT" } requires-python = ">=3.10.0,<4.0.0" dependencies = [ - "anthropic>=0.67.0,<1.0.0", + "anthropic>=0.69.0,<1.0.0", "langchain-core>=1.0.0a1,<2.0.0", "pydantic>=2.7.4,<3.0.0", ] diff --git a/libs/partners/anthropic/tests/cassettes/test_context_management.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_context_management.yaml.gz new file mode 100644 index 00000000000..096e6a0dd53 Binary files /dev/null and b/libs/partners/anthropic/tests/cassettes/test_context_management.yaml.gz differ diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index 339b5d5b78e..ec370d797f8 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -1761,6 +1761,50 @@ def test_search_result_top_level() -> None: ) +def test_memory_tool() -> None: + llm = ChatAnthropic( + model="claude-sonnet-4-5-20250929", # type: ignore[call-arg] + betas=["context-management-2025-06-27"], + ) + llm_with_tools = llm.bind_tools([{"type": "memory_20250818", "name": "memory"}]) + response = llm_with_tools.invoke("What are my interests?") + assert isinstance(response, AIMessage) + assert response.tool_calls + assert response.tool_calls[0]["name"] == "memory" + + +@pytest.mark.vcr +def test_context_management() -> None: + # TODO: update example to trigger action + llm = ChatAnthropic( + model="claude-sonnet-4-5-20250929", # type: ignore[call-arg] + betas=["context-management-2025-06-27"], + context_management={ + "edits": [ + { + "type": "clear_tool_uses_20250919", + "trigger": {"type": "input_tokens", "value": 10}, + "clear_at_least": {"type": "input_tokens", "value": 5}, + } + ] + }, + ) + llm_with_tools = llm.bind_tools( + [{"type": "web_search_20250305", "name": "web_search"}] + ) + input_message = {"role": "user", "content": "Search for recent developments in AI"} + response = llm_with_tools.invoke([input_message]) + assert response.response_metadata.get("context_management") + + # Test streaming + full: Optional[BaseMessageChunk] = None + for chunk in llm_with_tools.stream([input_message]): + assert isinstance(chunk, AIMessageChunk) + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.response_metadata.get("context_management") + + def test_async_shared_client() -> None: llm = ChatAnthropic(model="claude-3-5-haiku-latest") # type: ignore[call-arg] _ = asyncio.run(llm.ainvoke("Hello")) diff --git a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py index 9a904b868b4..043376b05c3 100644 --- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py @@ -1283,9 +1283,21 @@ def test_get_num_tokens_from_messages_passes_kwargs() -> None: with patch.object(anthropic, "Client") as _client: llm.get_num_tokens_from_messages([HumanMessage("foo")], foo="bar") - assert ( - _client.return_value.beta.messages.count_tokens.call_args.kwargs["foo"] == "bar" + assert _client.return_value.messages.count_tokens.call_args.kwargs["foo"] == "bar" + + llm = ChatAnthropic( + model="claude-sonnet-4-5-20250929", + betas=["context-management-2025-06-27"], + context_management={"edits": [{"type": "clear_tool_uses_20250919"}]}, ) + with patch.object(anthropic, "Client") as _client: + llm.get_num_tokens_from_messages([HumanMessage("foo")]) + + call_args = _client.return_value.beta.messages.count_tokens.call_args.kwargs + assert call_args["betas"] == ["context-management-2025-06-27"] + assert call_args["context_management"] == { + "edits": [{"type": "clear_tool_uses_20250919"}] + } def test_usage_metadata_standardization() -> None: @@ -1435,6 +1447,22 @@ def test_cache_control_kwarg() -> None: ] +def test_context_management_in_payload() -> None: + llm = ChatAnthropic( + model="claude-sonnet-4-5-20250929", # type: ignore[call-arg] + betas=["context-management-2025-06-27"], + context_management={"edits": [{"type": "clear_tool_uses_20250919"}]}, + ) + llm_with_tools = llm.bind_tools( + [{"type": "web_search_20250305", "name": "web_search"}] + ) + input_message = HumanMessage("Search for recent developments in AI") + payload = llm_with_tools._get_request_payload([input_message]) # type: ignore[attr-defined] + assert payload["context_management"] == { + "edits": [{"type": "clear_tool_uses_20250919"}] + } + + def test_anthropic_model_params() -> None: llm = ChatAnthropic(model="claude-3-5-haiku-latest") diff --git a/libs/partners/anthropic/uv.lock b/libs/partners/anthropic/uv.lock index f2b73eda553..e546e421184 100644 --- a/libs/partners/anthropic/uv.lock +++ b/libs/partners/anthropic/uv.lock @@ -21,7 +21,7 @@ wheels = [ [[package]] name = "anthropic" -version = "0.68.0" +version = "0.69.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -33,9 +33,9 @@ dependencies = [ { name = "sniffio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/64/46/da44bf087ddaf3f7dbe4808c00c7cde466fe68c4fc9fbebdfc231f4ea205/anthropic-0.68.0.tar.gz", hash = "sha256:507e9b5f627d1b249128ff15b21855e718fa4ed8dabc787d0e68860a4b32a7a8", size = 471584, upload-time = "2025-09-17T15:20:19.509Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c8/9d/9ad1778b95f15c5b04e7d328c1b5f558f1e893857b7c33cd288c19c0057a/anthropic-0.69.0.tar.gz", hash = "sha256:c604d287f4d73640f40bd2c0f3265a2eb6ce034217ead0608f6b07a8bc5ae5f2", size = 480622, upload-time = "2025-09-29T16:53:45.282Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/60/32/2d7553184b05bdbec61dd600014a55b9028408aee6128b25cb6f20e3002c/anthropic-0.68.0-py3-none-any.whl", hash = "sha256:ac579ea5eca22a7165b1042e6af57c4bf556e51afae3ca80e24768d4756b78c0", size = 325199, upload-time = "2025-09-17T15:20:17.452Z" }, + { url = "https://files.pythonhosted.org/packages/9b/38/75129688de5637eb5b383e5f2b1570a5cc3aecafa4de422da8eea4b90a6c/anthropic-0.69.0-py3-none-any.whl", hash = "sha256:1f73193040f33f11e27c2cd6ec25f24fe7c3f193dc1c5cde6b7a08b18a16bcc5", size = 337265, upload-time = "2025-09-29T16:53:43.686Z" }, ] [[package]] @@ -461,7 +461,7 @@ typing = [ [package.metadata] requires-dist = [ - { name = "anthropic", specifier = ">=0.67.0,<1.0.0" }, + { name = "anthropic", specifier = ">=0.69.0,<1.0.0" }, { name = "langchain-core", editable = "../../core" }, { name = "pydantic", specifier = ">=2.7.4,<3.0.0" }, ]