Compare commits

...

4 Commits

Author SHA1 Message Date
Bagatur
e7c2b41cab rfc: bind_tools(response_format) 2025-01-06 19:11:14 -05:00
Bagatur
87d8012ef6 fmt 2025-01-06 18:07:09 -05:00
Bagatur
47b386d28f wip 2025-01-06 16:18:31 -05:00
Bagatur
22863b8ac3 rfc: AIMessage.parsed 2025-01-06 16:11:20 -05:00
7 changed files with 246 additions and 32 deletions

View File

@@ -1238,17 +1238,19 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
llm = self.bind_tools([schema], tool_choice="any")
if isinstance(schema, type) and is_basemodel_subclass(schema):
output_parser: OutputParserLike = PydanticToolsParser(
tools=[cast(TypeBaseModel, schema)], first_tool_only=True
tools=[cast(TypeBaseModel, schema)],
first_tool_only=True,
return_message=True,
)
else:
key_name = convert_to_openai_tool(schema)["function"]["name"]
output_parser = JsonOutputKeyToolsParser(
key_name=key_name, first_tool_only=True
key_name=key_name, first_tool_only=True, return_message=True
)
if include_raw:
parser_assign = RunnablePassthrough.assign(
parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None
)
raw=itemgetter("raw") | output_parser
).assign(parsed=(lambda x: x["raw"].parsed), parsing_error=lambda _: None)
parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
parser_with_fallback = parser_assign.with_fallbacks(
[parser_none], exception_key="parsing_error"

View File

@@ -2,7 +2,7 @@ import json
import operator
from typing import Any, Literal, Optional, Union, cast
from pydantic import model_validator
from pydantic import BaseModel, model_validator
from typing_extensions import NotRequired, Self, TypedDict
from langchain_core.messages.base import (
@@ -163,6 +163,8 @@ class AIMessage(BaseMessage):
This is a standard representation of token usage that is consistent across models.
"""
parsed: Optional[Union[dict, BaseModel]] = None
"""The auto-parsed message contents."""
type: Literal["ai"] = "ai"
"""The type of the message (used for deserialization). Defaults to "ai"."""
@@ -440,11 +442,20 @@ def add_ai_message_chunks(
else:
usage_metadata = None
# 'parsed' always represents an aggregation not an incremental value, so the last
# non-null value is kept.
parsed = None
for m in reversed([left, *others]):
if m.parsed is not None:
parsed = m.parsed
break
return left.__class__(
example=left.example,
content=content,
additional_kwargs=additional_kwargs,
tool_call_chunks=tool_call_chunks,
parsed=parsed,
response_metadata=response_metadata,
usage_metadata=usage_metadata,
id=left.id,

View File

@@ -9,6 +9,7 @@ from typing import (
Optional,
TypeVar,
Union,
cast,
)
from typing_extensions import override
@@ -65,6 +66,8 @@ class BaseGenerationOutputParser(
):
"""Base class to parse the output of an LLM call."""
return_message: bool = False
@property
@override
def InputType(self) -> Any:
@@ -75,9 +78,12 @@ class BaseGenerationOutputParser(
@override
def OutputType(self) -> type[T]:
"""Return the output type for the parser."""
# even though mypy complains this isn't valid,
# it is good enough for pydantic to build the schema from
return T # type: ignore[misc]
if self.return_message:
return cast(type[T], AnyMessage)
else:
# even though mypy complains this isn't valid,
# it is good enough for pydantic to build the schema from
return T # type: ignore[misc]
def invoke(
self,
@@ -86,7 +92,7 @@ class BaseGenerationOutputParser(
**kwargs: Any,
) -> T:
if isinstance(input, BaseMessage):
return self._call_with_config(
parsed = self._call_with_config(
lambda inner_input: self.parse_result(
[ChatGeneration(message=inner_input)]
),
@@ -94,6 +100,10 @@ class BaseGenerationOutputParser(
config,
run_type="parser",
)
if self.return_message:
return cast(T, input.model_copy(update={"parsed": parsed}))
else:
return parsed
else:
return self._call_with_config(
lambda inner_input: self.parse_result([Generation(text=inner_input)]),
@@ -109,7 +119,7 @@ class BaseGenerationOutputParser(
**kwargs: Optional[Any],
) -> T:
if isinstance(input, BaseMessage):
return await self._acall_with_config(
parsed = await self._acall_with_config(
lambda inner_input: self.aparse_result(
[ChatGeneration(message=inner_input)]
),
@@ -117,6 +127,10 @@ class BaseGenerationOutputParser(
config,
run_type="parser",
)
if self.return_message:
return cast(T, input.model_copy(update={"parsed": parsed}))
else:
return parsed
else:
return await self._acall_with_config(
lambda inner_input: self.aparse_result([Generation(text=inner_input)]),
@@ -155,6 +169,8 @@ class BaseOutputParser(
return "boolean_output_parser"
""" # noqa: E501
return_message: bool = False
@property
@override
def InputType(self) -> Any:
@@ -171,6 +187,9 @@ class BaseOutputParser(
Raises:
TypeError: If the class doesn't have an inferable OutputType.
"""
if self.return_message:
return cast(type[T], AnyMessage)
for base in self.__class__.mro():
if hasattr(base, "__pydantic_generic_metadata__"):
metadata = base.__pydantic_generic_metadata__
@@ -190,7 +209,7 @@ class BaseOutputParser(
**kwargs: Any,
) -> T:
if isinstance(input, BaseMessage):
return self._call_with_config(
parsed = self._call_with_config(
lambda inner_input: self.parse_result(
[ChatGeneration(message=inner_input)]
),
@@ -198,6 +217,10 @@ class BaseOutputParser(
config,
run_type="parser",
)
if self.return_message:
return cast(T, input.model_copy(update={"parsed": parsed}))
else:
return parsed
else:
return self._call_with_config(
lambda inner_input: self.parse_result([Generation(text=inner_input)]),
@@ -213,7 +236,7 @@ class BaseOutputParser(
**kwargs: Optional[Any],
) -> T:
if isinstance(input, BaseMessage):
return await self._acall_with_config(
parsed = await self._acall_with_config(
lambda inner_input: self.aparse_result(
[ChatGeneration(message=inner_input)]
),
@@ -221,6 +244,10 @@ class BaseOutputParser(
config,
run_type="parser",
)
if self.return_message:
return cast(T, input.model_copy(update={"parsed": parsed}))
else:
return parsed
else:
return await self._acall_with_config(
lambda inner_input: self.aparse_result([Generation(text=inner_input)]),

View File

@@ -77,6 +77,21 @@
'default': None,
'title': 'Name',
}),
'parsed': dict({
'anyOf': list([
dict({
'type': 'object',
}),
dict({
'$ref': '#/$defs/BaseModel',
}),
dict({
'type': 'null',
}),
]),
'default': None,
'title': 'Parsed',
}),
'response_metadata': dict({
'title': 'Response Metadata',
'type': 'object',
@@ -181,6 +196,21 @@
'default': None,
'title': 'Name',
}),
'parsed': dict({
'anyOf': list([
dict({
'type': 'object',
}),
dict({
'$ref': '#/$defs/BaseModel',
}),
dict({
'type': 'null',
}),
]),
'default': None,
'title': 'Parsed',
}),
'response_metadata': dict({
'title': 'Response Metadata',
'type': 'object',
@@ -227,6 +257,12 @@
'title': 'AIMessageChunk',
'type': 'object',
}),
'BaseModel': dict({
'properties': dict({
}),
'title': 'BaseModel',
'type': 'object',
}),
'ChatMessage': dict({
'additionalProperties': True,
'description': 'Message that can be assigned an arbitrary speaker (i.e. role).',
@@ -1507,6 +1543,21 @@
'default': None,
'title': 'Name',
}),
'parsed': dict({
'anyOf': list([
dict({
'type': 'object',
}),
dict({
'$ref': '#/$defs/BaseModel',
}),
dict({
'type': 'null',
}),
]),
'default': None,
'title': 'Parsed',
}),
'response_metadata': dict({
'title': 'Response Metadata',
'type': 'object',
@@ -1611,6 +1662,21 @@
'default': None,
'title': 'Name',
}),
'parsed': dict({
'anyOf': list([
dict({
'type': 'object',
}),
dict({
'$ref': '#/$defs/BaseModel',
}),
dict({
'type': 'null',
}),
]),
'default': None,
'title': 'Parsed',
}),
'response_metadata': dict({
'title': 'Response Metadata',
'type': 'object',
@@ -1657,6 +1723,12 @@
'title': 'AIMessageChunk',
'type': 'object',
}),
'BaseModel': dict({
'properties': dict({
}),
'title': 'BaseModel',
'type': 'object',
}),
'ChatMessage': dict({
'additionalProperties': True,
'description': 'Message that can be assigned an arbitrary speaker (i.e. role).',

View File

@@ -451,6 +451,21 @@
'default': None,
'title': 'Name',
}),
'parsed': dict({
'anyOf': list([
dict({
'type': 'object',
}),
dict({
'$ref': '#/$defs/BaseModel',
}),
dict({
'type': 'null',
}),
]),
'default': None,
'title': 'Parsed',
}),
'response_metadata': dict({
'title': 'Response Metadata',
'type': 'object',
@@ -555,6 +570,21 @@
'default': None,
'title': 'Name',
}),
'parsed': dict({
'anyOf': list([
dict({
'type': 'object',
}),
dict({
'$ref': '#/$defs/BaseModel',
}),
dict({
'type': 'null',
}),
]),
'default': None,
'title': 'Parsed',
}),
'response_metadata': dict({
'title': 'Response Metadata',
'type': 'object',
@@ -601,6 +631,12 @@
'title': 'AIMessageChunk',
'type': 'object',
}),
'BaseModel': dict({
'properties': dict({
}),
'title': 'BaseModel',
'type': 'object',
}),
'ChatMessage': dict({
'additionalProperties': True,
'description': 'Message that can be assigned an arbitrary speaker (i.e. role).',

View File

@@ -48,8 +48,16 @@ from langchain_core.output_parsers import (
JsonOutputKeyToolsParser,
PydanticToolsParser,
)
from langchain_core.output_parsers.base import OutputParserLike
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from langchain_core.output_parsers.base import (
BaseGenerationOutputParser,
OutputParserLike,
)
from langchain_core.outputs import (
ChatGeneration,
ChatGenerationChunk,
ChatResult,
Generation,
)
from langchain_core.runnables import (
Runnable,
RunnableMap,
@@ -819,6 +827,7 @@ class ChatAnthropic(BaseChatModel):
tool_choice: Optional[
Union[Dict[str, str], Literal["any", "auto"], str]
] = None,
response_format: Optional[Union[dict, type]] = None,
**kwargs: Any,
) -> Runnable[LanguageModelInput, BaseMessage]:
r"""Bind tool-like objects to this chat model.
@@ -954,8 +963,13 @@ class ChatAnthropic(BaseChatModel):
AIMessage(content=[{'text': 'To get the current weather in San Francisco, I can use the GetWeather function. Let me check that for you.', 'type': 'text'}, {'id': 'toolu_01HtVtY1qhMFdPprx42qU2eA', 'input': {'location': 'San Francisco, CA'}, 'name': 'GetWeather', 'type': 'tool_use'}], response_metadata={'id': 'msg_016RfWHrRvW6DAGCdwB6Ac64', 'model': 'claude-3-5-sonnet-20240620', 'stop_reason': 'tool_use', 'stop_sequence': None, 'usage': {'input_tokens': 171, 'output_tokens': 82, 'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 1470}}, id='run-88b1f825-dcb7-4277-ac27-53df55d22001-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'San Francisco, CA'}, 'id': 'toolu_01HtVtY1qhMFdPprx42qU2eA', 'type': 'tool_call'}], usage_metadata={'input_tokens': 171, 'output_tokens': 82, 'total_tokens': 253})
""" # noqa: E501
if response_format:
tools.append(response_format)
formatted_tools = [convert_to_anthropic_tool(tool) for tool in tools]
if not tool_choice:
# If we have a response format, enforce that a tool is called.
if response_format and not tool_choice:
kwargs["tool_choice"] = {"type": "any"}
elif not tool_choice:
pass
elif isinstance(tool_choice, dict):
kwargs["tool_choice"] = tool_choice
@@ -968,7 +982,11 @@ class ChatAnthropic(BaseChatModel):
f"Unrecognized 'tool_choice' type {tool_choice=}. Expected dict, "
f"str, or None."
)
return self.bind(tools=formatted_tools, **kwargs)
llm = self.bind(tools=formatted_tools, **kwargs)
if response_format:
return llm | _ToolsToParsedMessage(response_format=response_format)
else:
return llm
def with_structured_output(
self,
@@ -1355,3 +1373,46 @@ def _create_usage_metadata(anthropic_usage: BaseModel) -> UsageMetadata:
**{k: v for k, v in input_token_details.items() if v is not None}
),
)
class _ToolsToParsedMessage(BaseGenerationOutputParser):
"""..."""
response_format: Union[dict, type[BaseModel]]
"""..."""
model_config = ConfigDict(extra="forbid")
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
"""Parse a list of candidate model Generations into a specific format.
Args:
result: A list of Generations to be parsed. The Generations are assumed
to be different candidate outputs for a single model input.
Returns:
Structured output.
"""
if not result or not isinstance(result[0], ChatGeneration):
msg = "..."
raise ValueError(msg)
message = cast(AIMessage, result[0].message)
drop = None
for tool_call in message.tool_calls:
if tool_call["name"] == self._response_format_name:
message.parsed = (
tool_call["args"]
if isinstance(self.response_format, dict)
else self.response_format(**tool_call["args"])
)
drop = tool_call["id"]
break
message.tool_calls = [tc for tc in message.tool_calls if tc["id"] != drop]
if isinstance(message, AIMessageChunk):
message.tool_call_chunks = [
tc for tc in message.tool_call_chunks if tc["id"] != drop
]
return message
@property
def _response_format_name(self) -> str:
return convert_to_anthropic_tool(self.response_format)["name"]

View File

@@ -85,11 +85,7 @@ from langchain_core.utils.function_calling import (
convert_to_openai_function,
convert_to_openai_tool,
)
from langchain_core.utils.pydantic import (
PydanticBaseModel,
TypeBaseModel,
is_basemodel_subclass,
)
from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass
from langchain_core.utils.utils import _build_model_kwargs, from_env, secret_from_env
from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
from typing_extensions import Self
@@ -777,6 +773,8 @@ class BaseChatOpenAI(BaseChatModel):
):
message = response.choices[0].message # type: ignore[attr-defined]
if hasattr(message, "parsed"):
cast(AIMessage, generations[0].message).parsed = message.parsed
# For backwards compatibility.
generations[0].message.additional_kwargs["parsed"] = message.parsed
if hasattr(message, "refusal"):
generations[0].message.additional_kwargs["refusal"] = message.refusal
@@ -1095,6 +1093,7 @@ class BaseChatOpenAI(BaseChatModel):
Union[dict, str, Literal["auto", "none", "required", "any"], bool]
] = None,
strict: Optional[bool] = None,
response_format: Optional[_DictOrPydanticClass] = None,
**kwargs: Any,
) -> Runnable[LanguageModelInput, BaseMessage]:
"""Bind tool-like objects to this chat model.
@@ -1165,6 +1164,11 @@ class BaseChatOpenAI(BaseChatModel):
f"Received: {tool_choice}"
)
kwargs["tool_choice"] = tool_choice
if response_format:
response_format = _convert_to_openai_response_format(
response_format, strict=strict
)
kwargs["response_format"] = response_format
return super().bind(tools=formatted_tools, **kwargs)
def with_structured_output(
@@ -1472,17 +1476,18 @@ class BaseChatOpenAI(BaseChatModel):
output_parser: Runnable = PydanticToolsParser(
tools=[schema], # type: ignore[list-item]
first_tool_only=True, # type: ignore[list-item]
return_message=True,
)
else:
output_parser = JsonOutputKeyToolsParser(
key_name=tool_name, first_tool_only=True
key_name=tool_name, first_tool_only=True, return_message=True
)
elif method == "json_mode":
llm = self.bind(response_format={"type": "json_object"})
output_parser = (
PydanticOutputParser(pydantic_object=schema) # type: ignore[arg-type]
PydanticOutputParser(pydantic_object=schema, return_message=True) # type: ignore[arg-type]
if is_pydantic_schema
else JsonOutputParser()
else JsonOutputParser(return_message=True)
)
elif method == "json_schema":
if schema is None:
@@ -1494,10 +1499,10 @@ class BaseChatOpenAI(BaseChatModel):
llm = self.bind(response_format=response_format)
if is_pydantic_schema:
output_parser = _oai_structured_outputs_parser.with_types(
output_type=cast(type, schema)
output_type=AIMessage
)
else:
output_parser = JsonOutputParser()
output_parser = JsonOutputParser(return_message=True)
else:
raise ValueError(
f"Unrecognized method argument. Expected one of 'function_calling' or "
@@ -1506,8 +1511,8 @@ class BaseChatOpenAI(BaseChatModel):
if include_raw:
parser_assign = RunnablePassthrough.assign(
parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None
)
raw=itemgetter("raw") | output_parser
).assign(parsed=lambda x: x["raw"].parsed, parsing_error=lambda _: None)
parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
parser_with_fallback = parser_assign.with_fallbacks(
[parser_none], exception_key="parsing_error"
@@ -2229,15 +2234,15 @@ def _convert_to_openai_response_format(
@chain
def _oai_structured_outputs_parser(ai_msg: AIMessage) -> PydanticBaseModel:
if ai_msg.additional_kwargs.get("parsed"):
return ai_msg.additional_kwargs["parsed"]
def _oai_structured_outputs_parser(ai_msg: AIMessage) -> AIMessage:
if ai_msg.parsed is not None:
return ai_msg
elif ai_msg.additional_kwargs.get("refusal"):
raise OpenAIRefusalError(ai_msg.additional_kwargs["refusal"])
else:
raise ValueError(
"Structured Output response does not have a 'parsed' field nor a 'refusal' "
"field. Received message:\n\n{ai_msg}"
f"field. Received message:\n\n{ai_msg}"
)