fix: LLM mimicking Unicode responses due to forced Unicode conversion of non-ASCII characters. (#32222)

fix: Fix LLM mimicking Unicode responses due to forced Unicode conversion of non-ASCII characters. - **Description:** This PR fixes an issue where the LLM would mimic Unicode responses due to forced Unicode conversion of non-ASCII characters in tool calls. The fix involves disabling the `ensure_ascii` flag in `json.dumps()` when converting tool calls to OpenAI format. - **Issue:** Fixes ↓↓↓ input： ```json {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': 'call_nv9trcehdpihr21zj9po19vq', 'function': {'name': 'create_customer', 'arguments': '{"customer_name": "你好啊集团"}'}}]} ``` output: ```json {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': 'call_nv9trcehdpihr21zj9po19vq', 'function': {'name': 'create_customer', 'arguments': '{"customer_name": "\\u4f60\\u597d\\u554a\\u96c6\\u56e2"}'}}]} ``` then: llm will mimic outputting unicode. Unicode's vast number of symbols can lengthen LLM responses, leading to slower performance. <img width="686" height="277" alt="image" src="https://github.com/user-attachments/assets/28f3b007-3964-4455-bee2-68f86ac1906d" /> --------- Co-authored-by: Mason Daugherty <github@mdrxy.com> Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-09-24 20:09:01 +00:00 · 2025-07-25 05:01:31 +08:00
parent d53ebf367e
commit 0d6f915442
30 changed files with 1963 additions and 1794 deletions
--- a/libs/standard-tests/tests/unit_tests/custom_chat_model.py
+++ b/libs/standard-tests/tests/unit_tests/custom_chat_model.py
@@ -68,6 +68,9 @@ class ChatParrotLink(BaseChatModel):
        """
        # Replace this with actual logic to generate a response from a list
        # of messages.
+        _ = stop  # Mark as used to avoid unused variable warning
+        _ = run_manager  # Mark as used to avoid unused variable warning
+        _ = kwargs  # Mark as used to avoid unused variable warning
        last_message = messages[-1]
        tokens = last_message.content[: self.parrot_buffer_length]
        ct_input_tokens = sum(len(message.content) for message in messages)
@@ -114,6 +117,8 @@ class ChatParrotLink(BaseChatModel):
                  downstream and understand why generation stopped.
            run_manager: A run manager with callbacks for the LLM.
        """
+        _ = stop  # Mark as used to avoid unused variable warning
+        _ = kwargs  # Mark as used to avoid unused variable warning
        last_message = messages[-1]
        tokens = str(last_message.content[: self.parrot_buffer_length])
        ct_input_tokens = sum(len(message.content) for message in messages)
--- a/libs/standard-tests/tests/unit_tests/test_custom_chat_model.py
+++ b/libs/standard-tests/tests/unit_tests/test_custom_chat_model.py
@@ -1,5 +1,10 @@
 """Test the standard tests on the custom chat model in the docs."""

+from typing import Optional
+
+import pytest
+from langchain_core.language_models.chat_models import BaseChatModel
+
 from langchain_tests.integration_tests import ChatModelIntegrationTests
 from langchain_tests.unit_tests import ChatModelUnitTests

@@ -24,3 +29,12 @@ class TestChatParrotLinkIntegration(ChatModelIntegrationTests):
    @property
    def chat_model_params(self) -> dict:
        return {"model": "bird-brain-001", "temperature": 0, "parrot_buffer_length": 50}
+
+    @pytest.mark.xfail(reason="ChatParrotLink doesn't implement bind_tools method")
+    def test_unicode_tool_call_integration(
+        self,
+        model: BaseChatModel,
+        tool_choice: Optional[str] = None,
+        force_tool_call: bool = True,
+    ) -> None:
+        """Expected failure as ChatParrotLink doesn't support tool calling yet."""