From 3048a9a26daa8c08a9a79543018708eb23f72c3f Mon Sep 17 00:00:00 2001
From: like <achenglike@foxmail.com>
Date: Wed, 11 Dec 2024 05:13:26 +0800
Subject: [PATCH] community: tongyi multimodal response format fix to support
 langchain (#28645)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Description: The multimodal(tongyi) response format "message": {"role":
"assistant", "content": [{"text": "图像"}]}}]} is not compatible with
LangChain.
Dependencies: No

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
---
 .../langchain_community/chat_models/tongyi.py       | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/libs/community/langchain_community/chat_models/tongyi.py b/libs/community/langchain_community/chat_models/tongyi.py
index a39e744531a..acb099b18b8 100644
--- a/libs/community/langchain_community/chat_models/tongyi.py
+++ b/libs/community/langchain_community/chat_models/tongyi.py
@@ -547,6 +547,19 @@ class ChatTongyi(BaseChatModel):
                 if _kwargs.get("stream") and not _kwargs.get(
                     "incremental_output", False
                 ):
+                    # inline fix response text logic
+                    resp_copy = json.loads(json.dumps(resp))
+                    if resp_copy.get("output") and resp_copy["output"].get("choices"):
+                        choice = resp_copy["output"]["choices"][0]
+                        message = choice["message"]
+                        if isinstance(message.get("content"), list):
+                            content_text = "".join(
+                                item.get("text", "")
+                                for item in message["content"]
+                                if isinstance(item, dict)
+                            )
+                            message["content"] = content_text
+                        resp = resp_copy
                     if prev_resp is None:
                         delta_resp = resp
                     else: