fix: deprecate setattr on ModelCallRequest (#34022)

* one alternative considered was setting `frozen=True` on the dataclass,
but this is breaking, so a deprecation is a nicer approach
This commit is contained in:
Sydney Runkle
2025-11-19 11:08:55 -05:00
committed by GitHub
parent 328ba36601
commit b7d1831f9d
16 changed files with 866 additions and 611 deletions

View File

@@ -94,14 +94,6 @@ class AnthropicPromptCachingMiddleware(AgentMiddleware):
)
return messages_count >= self.min_messages_to_cache
def _apply_cache_control(self, request: ModelRequest) -> None:
"""Apply cache control settings to the request.
Args:
request: The model request to modify.
"""
request.model_settings["cache_control"] = {"type": self.type, "ttl": self.ttl}
def wrap_model_call(
self,
request: ModelRequest,
@@ -119,8 +111,12 @@ class AnthropicPromptCachingMiddleware(AgentMiddleware):
if not self._should_apply_caching(request):
return handler(request)
self._apply_cache_control(request)
return handler(request)
model_settings = request.model_settings
new_model_settings = {
**model_settings,
"cache_control": {"type": self.type, "ttl": self.ttl},
}
return handler(request.override(model_settings=new_model_settings))
async def awrap_model_call(
self,
@@ -139,5 +135,9 @@ class AnthropicPromptCachingMiddleware(AgentMiddleware):
if not self._should_apply_caching(request):
return await handler(request)
self._apply_cache_control(request)
return await handler(request)
model_settings = request.model_settings
new_model_settings = {
**model_settings,
"cache_control": {"type": self.type, "ttl": self.ttl},
}
return await handler(request.override(model_settings=new_model_settings))

View File

@@ -82,12 +82,17 @@ def test_anthropic_prompt_caching_middleware_initialization() -> None:
model_settings={},
)
modified_request: ModelRequest | None = None
def mock_handler(req: ModelRequest) -> ModelResponse:
nonlocal modified_request
modified_request = req
return ModelResponse(result=[AIMessage(content="mock response")])
middleware.wrap_model_call(fake_request, mock_handler)
# Check that model_settings were passed through via the request
assert fake_request.model_settings == {
assert modified_request is not None
assert modified_request.model_settings == {
"cache_control": {"type": "ephemeral", "ttl": "5m"}
}
@@ -162,13 +167,18 @@ async def test_anthropic_prompt_caching_middleware_async() -> None:
model_settings={},
)
modified_request: ModelRequest | None = None
async def mock_handler(req: ModelRequest) -> ModelResponse:
nonlocal modified_request
modified_request = req
return ModelResponse(result=[AIMessage(content="mock response")])
result = await middleware.awrap_model_call(fake_request, mock_handler)
assert isinstance(result, ModelResponse)
# Check that model_settings were passed through via the request
assert fake_request.model_settings == {
assert modified_request is not None
assert modified_request.model_settings == {
"cache_control": {"type": "ephemeral", "ttl": "1h"}
}
@@ -237,13 +247,18 @@ async def test_anthropic_prompt_caching_middleware_async_min_messages() -> None:
model_settings={},
)
modified_request: ModelRequest | None = None
async def mock_handler(req: ModelRequest) -> ModelResponse:
nonlocal modified_request
modified_request = req
return ModelResponse(result=[AIMessage(content="mock response")])
result = await middleware.awrap_model_call(fake_request, mock_handler)
assert isinstance(result, ModelResponse)
# Cache control should NOT be added when message count is below minimum
assert fake_request.model_settings == {}
assert modified_request is not None
assert modified_request.model_settings == {}
async def test_anthropic_prompt_caching_middleware_async_with_system_prompt() -> None:
@@ -268,13 +283,18 @@ async def test_anthropic_prompt_caching_middleware_async_with_system_prompt() ->
model_settings={},
)
modified_request: ModelRequest | None = None
async def mock_handler(req: ModelRequest) -> ModelResponse:
nonlocal modified_request
modified_request = req
return ModelResponse(result=[AIMessage(content="mock response")])
result = await middleware.awrap_model_call(fake_request, mock_handler)
assert isinstance(result, ModelResponse)
# Cache control should be added when system prompt pushes count to minimum
assert fake_request.model_settings == {
assert modified_request is not None
assert modified_request.model_settings == {
"cache_control": {"type": "ephemeral", "ttl": "1h"}
}
@@ -300,12 +320,17 @@ async def test_anthropic_prompt_caching_middleware_async_default_values() -> Non
model_settings={},
)
modified_request: ModelRequest | None = None
async def mock_handler(req: ModelRequest) -> ModelResponse:
nonlocal modified_request
modified_request = req
return ModelResponse(result=[AIMessage(content="mock response")])
result = await middleware.awrap_model_call(fake_request, mock_handler)
assert isinstance(result, ModelResponse)
# Check that model_settings were added with default values
assert fake_request.model_settings == {
assert modified_request is not None
assert modified_request.model_settings == {
"cache_control": {"type": "ephemeral", "ttl": "5m"}
}

File diff suppressed because it is too large Load Diff