community[minor]: add chat model llamacpp (#22589)

- **PR title**: [community] add chat model llamacpp - **PR message**: - **Description:** This PR introduces a new chat model integration with llamacpp_python, designed to work similarly to the existing ChatOpenAI model. + Work well with instructed chat, chain and function/tool calling. + Work with LangGraph (persistent memory, tool calling), will update soon - **Dependencies:** This change requires the llamacpp_python library to be installed. @baskaryan --------- Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
2025-09-16 15:04:13 +00:00 · 2024-06-14 21:51:43 +07:00
parent e4279f80cd
commit b5e2ba3a47
5 changed files with 1417 additions and 0 deletions
--- a/libs/community/langchain_community/chat_models/llamacpp.py
+++ b/libs/community/langchain_community/chat_models/llamacpp.py
@@ -0,0 +1,811 @@
+import json
+from operator import itemgetter
+from pathlib import Path
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Type,
+    Union,
+    cast,
+)
+
+from langchain_core.callbacks import CallbackManagerForLLMRun
+from langchain_core.language_models import LanguageModelInput
+from langchain_core.language_models.chat_models import (
+    BaseChatModel,
+    generate_from_stream,
+)
+from langchain_core.messages import (
+    AIMessage,
+    AIMessageChunk,
+    BaseMessage,
+    BaseMessageChunk,
+    ChatMessage,
+    ChatMessageChunk,
+    FunctionMessage,
+    FunctionMessageChunk,
+    HumanMessage,
+    HumanMessageChunk,
+    SystemMessage,
+    SystemMessageChunk,
+    ToolMessage,
+    ToolMessageChunk,
+)
+from langchain_core.messages.tool import InvalidToolCall, ToolCall, ToolCallChunk
+from langchain_core.output_parsers.base import OutputParserLike
+from langchain_core.output_parsers.openai_tools import (
+    JsonOutputKeyToolsParser,
+    PydanticToolsParser,
+    make_invalid_tool_call,
+    parse_tool_call,
+)
+from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
+from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
+from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
+from langchain_core.tools import BaseTool
+from langchain_core.utils.function_calling import convert_to_openai_tool
+
+
+class ChatLlamaCpp(BaseChatModel):
+    """llama.cpp model.
+
+    To use, you should have the llama-cpp-python library installed, and provide the
+    path to the Llama model as a named parameter to the constructor.
+    Check out: https://github.com/abetlen/llama-cpp-python
+
+    """
+
+    client: Any  #: :meta private:
+
+    model_path: str
+    """The path to the Llama model file."""
+
+    lora_base: Optional[str] = None
+    """The path to the Llama LoRA base model."""
+
+    lora_path: Optional[str] = None
+    """The path to the Llama LoRA. If None, no LoRa is loaded."""
+
+    n_ctx: int = 512
+    """Token context window."""
+
+    n_parts: int = -1
+    """Number of parts to split the model into.
+    If -1, the number of parts is automatically determined."""
+
+    seed: int = -1
+    """Seed. If -1, a random seed is used."""
+
+    f16_kv: bool = True
+    """Use half-precision for key/value cache."""
+
+    logits_all: bool = False
+    """Return logits for all tokens, not just the last token."""
+
+    vocab_only: bool = False
+    """Only load the vocabulary, no weights."""
+
+    use_mlock: bool = False
+    """Force system to keep model in RAM."""
+
+    n_threads: Optional[int] = None
+    """Number of threads to use.
+    If None, the number of threads is automatically determined."""
+
+    n_batch: int = 8
+    """Number of tokens to process in parallel.
+    Should be a number between 1 and n_ctx."""
+
+    n_gpu_layers: Optional[int] = None
+    """Number of layers to be loaded into gpu memory. Default None."""
+
+    suffix: Optional[str] = None
+    """A suffix to append to the generated text. If None, no suffix is appended."""
+
+    max_tokens: int = 256
+    """The maximum number of tokens to generate."""
+
+    temperature: float = 0.8
+    """The temperature to use for sampling."""
+
+    top_p: float = 0.95
+    """The top-p value to use for sampling."""
+
+    logprobs: Optional[int] = None
+    """The number of logprobs to return. If None, no logprobs are returned."""
+
+    echo: bool = False
+    """Whether to echo the prompt."""
+
+    stop: Optional[List[str]] = None
+    """A list of strings to stop generation when encountered."""
+
+    repeat_penalty: float = 1.1
+    """The penalty to apply to repeated tokens."""
+
+    top_k: int = 40
+    """The top-k value to use for sampling."""
+
+    last_n_tokens_size: int = 64
+    """The number of tokens to look back when applying the repeat_penalty."""
+
+    use_mmap: bool = True
+    """Whether to keep the model loaded in RAM"""
+
+    rope_freq_scale: float = 1.0
+    """Scale factor for rope sampling."""
+
+    rope_freq_base: float = 10000.0
+    """Base frequency for rope sampling."""
+
+    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Any additional parameters to pass to llama_cpp.Llama."""
+
+    streaming: bool = True
+    """Whether to stream the results, token by token."""
+
+    grammar_path: Optional[Union[str, Path]] = None
+    """
+    grammar_path: Path to the .gbnf file that defines formal grammars
+    for constraining model outputs. For instance, the grammar can be used
+    to force the model to generate valid JSON or to speak exclusively in emojis. At most
+    one of grammar_path and grammar should be passed in.
+    """
+    grammar: Any = None
+    """
+    grammar: formal grammar for constraining model outputs. For instance, the grammar 
+    can be used to force the model to generate valid JSON or to speak exclusively in 
+    emojis. At most one of grammar_path and grammar should be passed in.
+    """
+
+    verbose: bool = True
+    """Print verbose output to stderr."""
+
+    @root_validator(pre=False, skip_on_failure=True)
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that llama-cpp-python library is installed."""
+        try:
+            from llama_cpp import Llama, LlamaGrammar
+        except ImportError:
+            raise ImportError(
+                "Could not import llama-cpp-python library. "
+                "Please install the llama-cpp-python library to "
+                "use this embedding model: pip install llama-cpp-python"
+            )
+
+        model_path = values["model_path"]
+        model_param_names = [
+            "rope_freq_scale",
+            "rope_freq_base",
+            "lora_path",
+            "lora_base",
+            "n_ctx",
+            "n_parts",
+            "seed",
+            "f16_kv",
+            "logits_all",
+            "vocab_only",
+            "use_mlock",
+            "n_threads",
+            "n_batch",
+            "use_mmap",
+            "last_n_tokens_size",
+            "verbose",
+        ]
+        model_params = {k: values[k] for k in model_param_names}
+        # For backwards compatibility, only include if non-null.
+        if values["n_gpu_layers"] is not None:
+            model_params["n_gpu_layers"] = values["n_gpu_layers"]
+
+        model_params.update(values["model_kwargs"])
+
+        try:
+            values["client"] = Llama(model_path, **model_params)
+        except Exception as e:
+            raise ValueError(
+                f"Could not load Llama model from path: {model_path}. "
+                f"Received error {e}"
+            )
+
+        if values["grammar"] and values["grammar_path"]:
+            grammar = values["grammar"]
+            grammar_path = values["grammar_path"]
+            raise ValueError(
+                "Can only pass in one of grammar and grammar_path. Received "
+                f"{grammar=} and {grammar_path=}."
+            )
+        elif isinstance(values["grammar"], str):
+            values["grammar"] = LlamaGrammar.from_string(values["grammar"])
+        elif values["grammar_path"]:
+            values["grammar"] = LlamaGrammar.from_file(values["grammar_path"])
+        else:
+            pass
+        return values
+
+    def _get_parameters(self, stop: Optional[List[str]]) -> Dict[str, Any]:
+        """
+        Performs sanity check, preparing parameters in format needed by llama_cpp.
+
+        Returns:
+            Dictionary containing the combined parameters.
+        """
+
+        params = self._default_params
+
+        # llama_cpp expects the "stop" key not this, so we remove it:
+        stop_sequences = params.pop("stop_sequences")
+
+        # then sets it as configured, or default to an empty list:
+        params["stop"] = stop or stop_sequences or self.stop or []
+
+        return params
+
+    def _create_message_dicts(
+        self, messages: List[BaseMessage]
+    ) -> List[Dict[str, Any]]:
+        message_dicts = [_convert_message_to_dict(m) for m in messages]
+
+        return message_dicts
+
+    def _create_chat_result(self, response: dict) -> ChatResult:
+        generations = []
+        for res in response["choices"]:
+            message = _convert_dict_to_message(res["message"])
+            generation_info = dict(finish_reason=res.get("finish_reason"))
+            if "logprobs" in res:
+                generation_info["logprobs"] = res["logprobs"]
+            gen = ChatGeneration(message=message, generation_info=generation_info)
+            generations.append(gen)
+        token_usage = response.get("usage", {})
+        llm_output = {
+            "token_usage": token_usage,
+            # "system_fingerprint": response.get("system_fingerprint", ""),
+        }
+        return ChatResult(generations=generations, llm_output=llm_output)
+
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        params = {**self._get_parameters(stop), **kwargs}
+
+        # Check tool_choice is whether available, if yes then run no stream with tool
+        # calling
+        if self.streaming and not params.get("tool_choice"):
+            stream_iter = self._stream(messages, run_manager=run_manager, **kwargs)
+            return generate_from_stream(stream_iter)
+
+        message_dicts = self._create_message_dicts(messages)
+
+        response = self.client.create_chat_completion(messages=message_dicts, **params)
+
+        return self._create_chat_result(response)
+
+    def _stream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        params = {**self._get_parameters(stop), **kwargs}
+        message_dicts = self._create_message_dicts(messages)
+
+        result = self.client.create_chat_completion(
+            messages=message_dicts, stream=True, **params
+        )
+
+        default_chunk_class = AIMessageChunk
+        count = 0
+        for chunk in result:
+            count += 1
+            if not isinstance(chunk, dict):
+                chunk = chunk.model_dump()
+            if len(chunk["choices"]) == 0:
+                continue
+            choice = chunk["choices"][0]
+            if choice["delta"] is None:
+                continue
+            chunk = _convert_delta_to_message_chunk(
+                choice["delta"], default_chunk_class
+            )
+            generation_info = {}
+            if finish_reason := choice.get("finish_reason"):
+                generation_info["finish_reason"] = finish_reason
+            logprobs = choice.get("logprobs")
+            if logprobs:
+                generation_info["logprobs"] = logprobs
+            default_chunk_class = chunk.__class__
+            chunk = ChatGenerationChunk(
+                message=chunk, generation_info=generation_info or None
+            )
+            if run_manager:
+                run_manager.on_llm_new_token(chunk.text, chunk=chunk, logprobs=logprobs)
+            yield chunk
+
+    def bind_tools(
+        self,
+        tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
+        *,
+        tool_choice: Optional[Union[Dict[str, Dict], bool, str]] = None,
+        **kwargs: Any,
+    ) -> Runnable[LanguageModelInput, BaseMessage]:
+        """Bind tool-like objects to this chat model
+
+        tool_choice: does not currently support "any", "auto" choices like OpenAI
+            tool-calling API. should be a dict of the form to force this tool
+            {"type": "function", "function": {"name": <<tool_name>>}}.
+        """
+        formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
+        tool_names = [ft["function"]["name"] for ft in formatted_tools]
+        if tool_choice:
+            if isinstance(tool_choice, dict):
+                if not any(
+                    tool_choice["function"]["name"] == name for name in tool_names
+                ):
+                    raise ValueError(
+                        f"Tool choice {tool_choice=} was specified, but the only "
+                        f"provided tools were {tool_names}."
+                    )
+            elif isinstance(tool_choice, str):
+                chosen = [
+                    f for f in formatted_tools if f["function"]["name"] == tool_choice
+                ]
+                if not chosen:
+                    raise ValueError(
+                        f"Tool choice {tool_choice=} was specified, but the only "
+                        f"provided tools were {tool_names}."
+                    )
+            elif isinstance(tool_choice, bool):
+                if len(formatted_tools) > 1:
+                    raise ValueError(
+                        "tool_choice=True can only be specified when a single tool is "
+                        f"passed in. Received {len(tools)} tools."
+                    )
+                tool_choice = formatted_tools[0]
+            else:
+                raise ValueError(
+                    """Unrecognized tool_choice type. Expected dict having format like 
+                    this {"type": "function", "function": {"name": <<tool_name>>}}"""
+                    f"Received: {tool_choice}"
+                )
+
+        kwargs["tool_choice"] = tool_choice
+        formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
+        return super().bind(tools=formatted_tools, **kwargs)
+
+    def with_structured_output(
+        self,
+        schema: Optional[Union[Dict, Type[BaseModel]]] = None,
+        *,
+        include_raw: bool = False,
+        **kwargs: Any,
+    ) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
+        """Model wrapper that returns outputs formatted to match the given schema.
+
+        Args:
+            schema: The output schema as a dict or a Pydantic class. If a Pydantic class
+                then the model output will be an object of that class. If a dict then
+                the model output will be a dict. With a Pydantic class the returned
+                attributes will be validated, whereas with a dict they will not be. If
+                `method` is "function_calling" and `schema` is a dict, then the dict
+                must match the OpenAI function-calling spec or be a valid JSON schema
+                with top level 'title' and 'description' keys specified.
+            include_raw: If False then only the parsed structured output is returned. If
+                an error occurs during model output parsing it will be raised. If True
+                then both the raw model response (a BaseMessage) and the parsed model
+                response will be returned. If an error occurs during output parsing it
+                will be caught and returned as well. The final output is always a dict
+                with keys "raw", "parsed", and "parsing_error".
+            kwargs: Any other args to bind to model, ``self.bind(..., **kwargs)``.
+
+        Returns:
+            A Runnable that takes any ChatModel input and returns as output:
+
+                If include_raw is True then a dict with keys:
+                    raw: BaseMessage
+                    parsed: Optional[_DictOrPydantic]
+                    parsing_error: Optional[BaseException]
+
+                If include_raw is False then just _DictOrPydantic is returned,
+                where _DictOrPydantic depends on the schema:
+
+                If schema is a Pydantic class then _DictOrPydantic is the Pydantic
+                    class.
+
+                If schema is a dict then _DictOrPydantic is a dict.
+
+        Example: Pydantic schema (include_raw=False):
+            .. code-block:: python
+
+                from langchain_community.chat_models import ChatLlamaCpp
+                from langchain_core.pydantic_v1 import BaseModel
+
+                class AnswerWithJustification(BaseModel):
+                    '''An answer to the user question along with justification for the answer.'''
+                    answer: str
+                    justification: str
+
+                llm = ChatLlamaCpp(
+                    temperature=0.,
+                    model_path="./SanctumAI-meta-llama-3-8b-instruct.Q8_0.gguf",
+                    n_ctx=10000,
+                    n_gpu_layers=4,
+                    n_batch=200,
+                    max_tokens=512,
+                    n_threads=multiprocessing.cpu_count() - 1,
+                    repeat_penalty=1.5,
+                    top_p=0.5,
+                    stop=["<|end_of_text|>", "<|eot_id|>"],
+                )
+                structured_llm = llm.with_structured_output(AnswerWithJustification)
+
+                structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
+
+                # -> AnswerWithJustification(
+                #     answer='They weigh the same',
+                #     justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
+                # )
+
+        Example: Pydantic schema (include_raw=True):
+            .. code-block:: python
+
+                from langchain_community.chat_models import ChatLlamaCpp
+                from langchain_core.pydantic_v1 import BaseModel
+
+                class AnswerWithJustification(BaseModel):
+                    '''An answer to the user question along with justification for the answer.'''
+                    answer: str
+                    justification: str
+
+                llm = ChatLlamaCpp(
+                    temperature=0.,
+                    model_path="./SanctumAI-meta-llama-3-8b-instruct.Q8_0.gguf",
+                    n_ctx=10000,
+                    n_gpu_layers=4,
+                    n_batch=200,
+                    max_tokens=512,
+                    n_threads=multiprocessing.cpu_count() - 1,
+                    repeat_penalty=1.5,
+                    top_p=0.5,
+                    stop=["<|end_of_text|>", "<|eot_id|>"],
+                )
+                structured_llm = llm.with_structured_output(AnswerWithJustification, include_raw=True)
+
+                structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
+                # -> {
+                #     'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
+                #     'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
+                #     'parsing_error': None
+                # }
+
+        Example: dict schema (include_raw=False):
+            .. code-block:: python
+
+                from langchain_community.chat_models import ChatLlamaCpp
+                from langchain_core.pydantic_v1 import BaseModel
+                from langchain_core.utils.function_calling import convert_to_openai_tool
+
+                class AnswerWithJustification(BaseModel):
+                    '''An answer to the user question along with justification for the answer.'''
+                    answer: str
+                    justification: str
+
+                dict_schema = convert_to_openai_tool(AnswerWithJustification)
+                llm = ChatLlamaCpp(
+                    temperature=0.,
+                    model_path="./SanctumAI-meta-llama-3-8b-instruct.Q8_0.gguf",
+                    n_ctx=10000,
+                    n_gpu_layers=4,
+                    n_batch=200,
+                    max_tokens=512,
+                    n_threads=multiprocessing.cpu_count() - 1,
+                    repeat_penalty=1.5,
+                    top_p=0.5,
+                    stop=["<|end_of_text|>", "<|eot_id|>"],
+                )
+                structured_llm = llm.with_structured_output(dict_schema)
+
+                structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
+                # -> {
+                #     'answer': 'They weigh the same',
+                #     'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
+                # }
+
+        """  # noqa: E501
+
+        if kwargs:
+            raise ValueError(f"Received unsupported arguments {kwargs}")
+        is_pydantic_schema = isinstance(schema, type) and issubclass(schema, BaseModel)
+        if schema is None:
+            raise ValueError(
+                "schema must be specified when method is 'function_calling'. "
+                "Received None."
+            )
+        llm = self.bind_tools([schema], tool_choice=True)
+        if is_pydantic_schema:
+            output_parser: OutputParserLike = PydanticToolsParser(
+                tools=[cast(Type, schema)], first_tool_only=True
+            )
+        else:
+            key_name = convert_to_openai_tool(schema)["function"]["name"]
+            output_parser = JsonOutputKeyToolsParser(
+                key_name=key_name, first_tool_only=True
+            )
+
+        if include_raw:
+            parser_assign = RunnablePassthrough.assign(
+                parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None
+            )
+            parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
+            parser_with_fallback = parser_assign.with_fallbacks(
+                [parser_none], exception_key="parsing_error"
+            )
+            return RunnableMap(raw=llm) | parser_with_fallback
+        else:
+            return llm | output_parser
+
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        """Return a dictionary of identifying parameters.
+
+        This information is used by the LangChain callback system, which
+        is used for tracing purposes make it possible to monitor LLMs.
+        """
+        return {
+            # The model name allows users to specify custom token counting
+            # rules in LLM monitoring applications (e.g., in LangSmith users
+            # can provide per token pricing for their model and monitor
+            # costs for the given LLM.)
+            **{"model_path": self.model_path},
+            **self._default_params,
+        }
+
+    @property
+    def _llm_type(self) -> str:
+        """Get the type of language model used by this chat model."""
+        return "llama-cpp-python"
+
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """Get the default parameters for calling create_chat_completion."""
+        params: Dict = {
+            "max_tokens": self.max_tokens,
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "top_k": self.top_k,
+            "logprobs": self.logprobs,
+            "stop_sequences": self.stop,  # key here is convention among LLM classes
+            "repeat_penalty": self.repeat_penalty,
+        }
+        if self.grammar:
+            params["grammar"] = self.grammar
+        return params
+
+
+def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict:
+    return {
+        "type": "function",
+        "id": tool_call["id"],
+        "function": {
+            "name": tool_call["name"],
+            "arguments": json.dumps(tool_call["args"]),
+        },
+    }
+
+
+def _lc_invalid_tool_call_to_openai_tool_call(
+    invalid_tool_call: InvalidToolCall,
+) -> dict:
+    return {
+        "type": "function",
+        "id": invalid_tool_call["id"],
+        "function": {
+            "name": invalid_tool_call["name"],
+            "arguments": invalid_tool_call["args"],
+        },
+    }
+
+
+def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
+    """Convert a dictionary to a LangChain message.
+
+    Args:
+        _dict: The dictionary.
+
+    Returns:
+        The LangChain message.
+    """
+    role = _dict.get("role")
+    name = _dict.get("name")
+    id_ = _dict.get("id")
+    if role == "user":
+        return HumanMessage(content=_dict.get("content", ""), id=id_, name=name)
+    elif role == "assistant":
+        # Fix for azure
+        # Also OpenAI returns None for tool invocations
+        content = _dict.get("content", "") or ""
+        additional_kwargs: Dict = {}
+        if function_call := _dict.get("function_call"):
+            additional_kwargs["function_call"] = dict(function_call)
+        tool_calls = []
+        invalid_tool_calls = []
+        if raw_tool_calls := _dict.get("tool_calls"):
+            additional_kwargs["tool_calls"] = raw_tool_calls
+            for raw_tool_call in raw_tool_calls:
+                try:
+                    tc = parse_tool_call(raw_tool_call, return_id=True)
+                except Exception as e:
+                    invalid_tc = make_invalid_tool_call(raw_tool_call, str(e))
+                    invalid_tool_calls.append(invalid_tc)
+                else:
+                    if not tc:
+                        continue
+                    else:
+                        tool_calls.append(tc)
+        return AIMessage(
+            content=content,
+            additional_kwargs=additional_kwargs,
+            name=name,
+            id=id_,
+            tool_calls=tool_calls,  # type: ignore[arg-type]
+            invalid_tool_calls=invalid_tool_calls,
+        )
+    elif role == "system":
+        return SystemMessage(content=_dict.get("content", ""), name=name, id=id_)
+    elif role == "function":
+        return FunctionMessage(
+            content=_dict.get("content", ""), name=cast(str, _dict.get("name")), id=id_
+        )
+    elif role == "tool":
+        additional_kwargs = {}
+        if "name" in _dict:
+            additional_kwargs["name"] = _dict["name"]
+        return ToolMessage(
+            content=_dict.get("content", ""),
+            tool_call_id=cast(str, _dict.get("tool_call_id")),
+            additional_kwargs=additional_kwargs,
+            name=name,
+            id=id_,
+        )
+    else:
+        return ChatMessage(
+            content=_dict.get("content", ""), role=cast(str, role), id=id_
+        )
+
+
+def _format_message_content(content: Any) -> Any:
+    """Format message content."""
+    if content and isinstance(content, list):
+        # Remove unexpected block types
+        formatted_content = []
+        for block in content:
+            if (
+                isinstance(block, dict)
+                and "type" in block
+                and block["type"] == "tool_use"
+            ):
+                continue
+            else:
+                formatted_content.append(block)
+    else:
+        formatted_content = content
+
+    return formatted_content
+
+
+def _convert_message_to_dict(message: BaseMessage) -> dict:
+    """Convert a LangChain message to a dictionary.
+
+    Args:
+        message: The LangChain message.
+
+    Returns:
+        The dictionary.
+    """
+    message_dict: Dict[str, Any] = {
+        "content": _format_message_content(message.content),
+    }
+    if (name := message.name or message.additional_kwargs.get("name")) is not None:
+        message_dict["name"] = name
+
+    # populate role and additional message data
+    if isinstance(message, ChatMessage):
+        message_dict["role"] = message.role
+    elif isinstance(message, HumanMessage):
+        message_dict["role"] = "user"
+    elif isinstance(message, AIMessage):
+        message_dict["role"] = "assistant"
+        if "function_call" in message.additional_kwargs:
+            message_dict["function_call"] = message.additional_kwargs["function_call"]
+        if message.tool_calls or message.invalid_tool_calls:
+            message_dict["tool_calls"] = [
+                _lc_tool_call_to_openai_tool_call(tc) for tc in message.tool_calls
+            ] + [
+                _lc_invalid_tool_call_to_openai_tool_call(tc)
+                for tc in message.invalid_tool_calls
+            ]
+        elif "tool_calls" in message.additional_kwargs:
+            message_dict["tool_calls"] = message.additional_kwargs["tool_calls"]
+            tool_call_supported_props = {"id", "type", "function"}
+            message_dict["tool_calls"] = [
+                {k: v for k, v in tool_call.items() if k in tool_call_supported_props}
+                for tool_call in message_dict["tool_calls"]
+            ]
+        else:
+            pass
+        # If tool calls present, content null value should be None not empty string.
+        if "function_call" in message_dict or "tool_calls" in message_dict:
+            message_dict["content"] = message_dict["content"] or None
+    elif isinstance(message, SystemMessage):
+        message_dict["role"] = "system"
+    elif isinstance(message, FunctionMessage):
+        message_dict["role"] = "function"
+    elif isinstance(message, ToolMessage):
+        message_dict["role"] = "tool"
+        message_dict["tool_call_id"] = message.tool_call_id
+
+        supported_props = {"content", "role", "tool_call_id"}
+        message_dict = {k: v for k, v in message_dict.items() if k in supported_props}
+    else:
+        raise TypeError(f"Got unknown type {message}")
+    return message_dict
+
+
+def _convert_delta_to_message_chunk(
+    _dict: Mapping[str, Any], default_class: Type[BaseMessageChunk]
+) -> BaseMessageChunk:
+    id_ = _dict.get("id")
+    role = cast(str, _dict.get("role"))
+    content = cast(str, _dict.get("content") or "")
+    additional_kwargs: Dict = {}
+    if _dict.get("function_call"):
+        function_call = dict(_dict["function_call"])
+        if "name" in function_call and function_call["name"] is None:
+            function_call["name"] = ""
+        additional_kwargs["function_call"] = function_call
+    tool_call_chunks = []
+    if raw_tool_calls := _dict.get("tool_calls"):
+        additional_kwargs["tool_calls"] = raw_tool_calls
+        for rtc in raw_tool_calls:
+            try:
+                tool_call = ToolCallChunk(
+                    name=rtc["function"].get("name"),
+                    args=rtc["function"].get("arguments"),
+                    id=rtc.get("id"),
+                    index=rtc["index"],
+                )
+                tool_call_chunks.append(tool_call)
+            except KeyError:
+                pass
+
+    if role == "user" or default_class == HumanMessageChunk:
+        return HumanMessageChunk(content=content, id=id_)
+    elif role == "assistant" or default_class == AIMessageChunk:
+        return AIMessageChunk(
+            content=content,
+            additional_kwargs=additional_kwargs,
+            id=id_,
+            tool_call_chunks=tool_call_chunks,
+        )
+    elif role == "system" or default_class == SystemMessageChunk:
+        return SystemMessageChunk(content=content, id=id_)
+    elif role == "function" or default_class == FunctionMessageChunk:
+        return FunctionMessageChunk(content=content, name=_dict["name"], id=id_)
+    elif role == "tool" or default_class == ToolMessageChunk:
+        return ToolMessageChunk(
+            content=content, tool_call_id=_dict["tool_call_id"], id=id_
+        )
+    elif role or default_class == ChatMessageChunk:
+        return ChatMessageChunk(content=content, role=role, id=id_)
+    else:
+        return default_class(content=content, id=id_)  # type: ignore