From 577ed68b59b1516395088d9c4cbbe470514dfd00 Mon Sep 17 00:00:00 2001 From: Mohammad Mohtashim <45242107+keenborder786@users.noreply.github.com> Date: Tue, 28 May 2024 02:16:52 +0500 Subject: [PATCH] mistralai[patch]: Added Json Mode for ChatMistralAI (#22213) - **Description:** Powered [ChatMistralAI.with_structured_output](https://github.com/langchain-ai/langchain/blob/fbfed65fb1ccff3eb8477c4f114450537a0510b2/libs/partners/mistralai/langchain_mistralai/chat_models.py#L609) via json mode - **Issue:** #22081 --------- Co-authored-by: Bagatur --- docs/scripts/model_feat_table.py | 1 + .../langchain_mistralai/chat_models.py | 94 ++++++++++++++++--- 2 files changed, 84 insertions(+), 11 deletions(-) diff --git a/docs/scripts/model_feat_table.py b/docs/scripts/model_feat_table.py index ebdbaa3e9be..3b047e55097 100644 --- a/docs/scripts/model_feat_table.py +++ b/docs/scripts/model_feat_table.py @@ -24,6 +24,7 @@ CHAT_MODEL_FEAT_TABLE = { "ChatMistralAI": { "tool_calling": True, "structured_output": True, + "json_model": True, "package": "langchain-mistralai", "link": "/docs/integrations/chat/mistralai/", }, diff --git a/libs/partners/mistralai/langchain_mistralai/chat_models.py b/libs/partners/mistralai/langchain_mistralai/chat_models.py index 2a18876a0f7..271941def89 100644 --- a/libs/partners/mistralai/langchain_mistralai/chat_models.py +++ b/libs/partners/mistralai/langchain_mistralai/chat_models.py @@ -12,6 +12,7 @@ from typing import ( Dict, Iterator, List, + Literal, Optional, Sequence, Tuple, @@ -49,6 +50,10 @@ from langchain_core.messages import ( ToolCall, ToolMessage, ) +from langchain_core.output_parsers import ( + JsonOutputParser, + PydanticOutputParser, +) from langchain_core.output_parsers.base import OutputParserLike from langchain_core.output_parsers.openai_tools import ( JsonOutputKeyToolsParser, @@ -608,8 +613,9 @@ class ChatMistralAI(BaseChatModel): def with_structured_output( self, - schema: Union[Dict, Type[BaseModel]], + schema: Optional[Union[Dict, Type[BaseModel]]] = None, *, + method: Literal["function_calling", "json_mode"] = "function_calling", include_raw: bool = False, **kwargs: Any, ) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]: @@ -622,6 +628,12 @@ class ChatMistralAI(BaseChatModel): attributes will be validated, whereas with a dict they will not be. If `method` is "function_calling" and `schema` is a dict, then the dict must match the OpenAI function-calling spec. + method: The method for steering model generation, either "function_calling" + or "json_mode". If "function_calling" then the schema will be converted + to an OpenAI function and the returned model will make use of the + function-calling API. If "json_mode" then OpenAI's JSON mode will be + used. Note that if using "json_mode" then you must include instructions + for formatting the output into the desired schema into the model call. include_raw: If False then only the parsed structured output is returned. If an error occurs during model output parsing it will be raised. If True then both the raw model response (a BaseMessage) and the parsed model @@ -709,21 +721,81 @@ class ChatMistralAI(BaseChatModel): # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' # } + Example: JSON mode, Pydantic schema (method="json_mode", include_raw=True): + .. code-block:: + + from langchain_mistralai import ChatMistralAI + from langchain_core.pydantic_v1 import BaseModel + + class AnswerWithJustification(BaseModel): + answer: str + justification: str + + llm = ChatMistralAI(model="mistral-large-latest", temperature=0) + structured_llm = llm.with_structured_output( + AnswerWithJustification, + method="json_mode", + include_raw=True + ) + + structured_llm.invoke( + "Answer the following question. " + "Make sure to return a JSON blob with keys 'answer' and 'justification'.\n\n" + "What's heavier a pound of bricks or a pound of feathers?" + ) + # -> { + # 'raw': AIMessage(content='{\n "answer": "They are both the same weight.",\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \n}'), + # 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'), + # 'parsing_error': None + # } + + Example: JSON mode, no schema (schema=None, method="json_mode", include_raw=True): + .. code-block:: + + from langchain_mistralai import ChatMistralAI + + structured_llm = llm.with_structured_output(method="json_mode", include_raw=True) + + structured_llm.invoke( + "Answer the following question. " + "Make sure to return a JSON blob with keys 'answer' and 'justification'.\n\n" + "What's heavier a pound of bricks or a pound of feathers?" + ) + # -> { + # 'raw': AIMessage(content='{\n "answer": "They are both the same weight.",\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \n}'), + # 'parsed': { + # 'answer': 'They are both the same weight.', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.' + # }, + # 'parsing_error': None + # } """ # noqa: E501 if kwargs: raise ValueError(f"Received unsupported arguments {kwargs}") is_pydantic_schema = isinstance(schema, type) and issubclass(schema, BaseModel) - llm = self.bind_tools([schema], tool_choice="any") - if is_pydantic_schema: - output_parser: OutputParserLike = PydanticToolsParser( - tools=[schema], first_tool_only=True + if method == "function_calling": + if schema is None: + raise ValueError( + "schema must be specified when method is 'function_calling'. " + "Received None." + ) + llm = self.bind_tools([schema], tool_choice="any") + if is_pydantic_schema: + output_parser: OutputParserLike = PydanticToolsParser( + tools=[schema], first_tool_only=True + ) + else: + key_name = convert_to_openai_tool(schema)["function"]["name"] + output_parser = JsonOutputKeyToolsParser( + key_name=key_name, first_tool_only=True + ) + elif method == "json_mode": + llm = self.bind(response_format={"type": "json_object"}) + output_parser = ( + PydanticOutputParser(pydantic_object=schema) + if is_pydantic_schema + else JsonOutputParser() ) - else: - key_name = convert_to_openai_tool(schema)["function"]["name"] - output_parser = JsonOutputKeyToolsParser( - key_name=key_name, first_tool_only=True - ) - if include_raw: parser_assign = RunnablePassthrough.assign( parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None