update anthropic

This commit is contained in:
Chester Curme 2025-04-08 18:29:17 -04:00
parent cbd05c66de
commit b1fc20cbcd
2 changed files with 119 additions and 0 deletions

View File

@ -42,6 +42,7 @@ from langchain_core.messages import (
SystemMessage, SystemMessage,
ToolCall, ToolCall,
ToolMessage, ToolMessage,
is_data_content_block,
) )
from langchain_core.messages.ai import InputTokenDetails, UsageMetadata from langchain_core.messages.ai import InputTokenDetails, UsageMetadata
from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk
@ -184,6 +185,73 @@ def _merge_messages(
return merged return merged
def _format_data_content_block(block: dict) -> dict:
"""Format standard data content block to format expected by Anthropic."""
if block["type"] == "image":
if block["source_type"] == "url":
if block["source"].startswith("data:"):
# Data URI
formatted_block = {
"type": "image",
"source": _format_image(block["source"]),
}
else:
formatted_block = {
"type": "image",
"source": {"type": "url", "url": block["source"]},
}
elif block["source_type"] == "base64":
formatted_block = {
"type": "image",
"source": {
"type": "base64",
"media_type": block["mime_type"],
"data": block["source"],
},
}
else:
raise ValueError(
"Anthropic only supports 'url' and 'base64' source_type for image "
"content blocks."
)
elif block["type"] == "file":
if block["source_type"] == "url":
formatted_block = {
"type": "document",
"source": {
"type": "url",
"url": block["source"],
},
}
elif block["source_type"] == "base64":
formatted_block = {
"type": "document",
"source": {
"type": "base64",
"media_type": block.get("mime_type") or "application/pdf",
"data": block["source"],
},
}
elif block["source_type"] == "text":
formatted_block = {
"type": "document",
"source": {
"type": "text",
"media_type": block.get("mime_type") or "text/plain",
"data": block["source"],
},
}
else:
raise ValueError(f"Block of type {block['type']} is not supported.")
if formatted_block and (metadata := block.get("metadata")):
formatted_block = {**formatted_block, **metadata}
return formatted_block
def _format_messages( def _format_messages(
messages: List[BaseMessage], messages: List[BaseMessage],
) -> Tuple[Union[str, List[Dict], None], List[Dict]]: ) -> Tuple[Union[str, List[Dict], None], List[Dict]]:
@ -240,6 +308,8 @@ def _format_messages(
# convert format # convert format
source = _format_image(block["image_url"]["url"]) source = _format_image(block["image_url"]["url"])
content.append({"type": "image", "source": source}) content.append({"type": "image", "source": source})
elif is_data_content_block(block):
content.append(_format_data_content_block(block))
elif block["type"] == "tool_use": elif block["type"] == "tool_use":
# If a tool_call with the same id as a tool_use content block # If a tool_call with the same id as a tool_use content block
# exists, the tool_call is preferred. # exists, the tool_call is preferred.

View File

@ -663,6 +663,34 @@ def test_pdf_document_input() -> None:
assert isinstance(result.content, str) assert isinstance(result.content, str)
assert len(result.content) > 0 assert len(result.content) > 0
# Test cache control with standard format
result = ChatAnthropic(model=IMAGE_MODEL_NAME).invoke(
[
HumanMessage(
[
{
"type": "text",
"text": "Summarize this document:",
},
{
"type": "file",
"source_type": "base64",
"mime_type": "application/pdf",
"source": data,
"metadata": {"cache_control": {"type": "ephemeral"}},
},
]
)
]
)
assert isinstance(result, AIMessage)
assert isinstance(result.content, str)
assert len(result.content) > 0
assert result.usage_metadata is not None
cache_creation = result.usage_metadata["input_token_details"]["cache_creation"]
cache_read = result.usage_metadata["input_token_details"]["cache_read"]
assert cache_creation > 0 or cache_read > 0
def test_citations() -> None: def test_citations() -> None:
llm = ChatAnthropic(model="claude-3-5-haiku-latest") llm = ChatAnthropic(model="claude-3-5-haiku-latest")
@ -699,6 +727,27 @@ def test_citations() -> None:
assert any("citations" in block for block in full.content) assert any("citations" in block for block in full.content)
assert not any("citation" in block for block in full.content) assert not any("citation" in block for block in full.content)
# Test standard format
messages = [
{
"role": "user",
"content": [
{
"type": "file",
"source_type": "text",
"source": "The grass is green. The sky is blue.",
"mime_type": "text/plain",
"metadata": {"citations": {"enabled": True}},
},
{"type": "text", "text": "What color is the grass and sky?"},
],
}
]
response = llm.invoke(messages)
assert isinstance(response, AIMessage)
assert isinstance(response.content, list)
assert any("citations" in block for block in response.content)
def test_thinking() -> None: def test_thinking() -> None:
llm = ChatAnthropic( llm = ChatAnthropic(