mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-24 20:09:01 +00:00
fix: LLM mimicking Unicode responses due to forced Unicode conversion of non-ASCII characters. (#32222)
fix: Fix LLM mimicking Unicode responses due to forced Unicode conversion of non-ASCII characters. - **Description:** This PR fixes an issue where the LLM would mimic Unicode responses due to forced Unicode conversion of non-ASCII characters in tool calls. The fix involves disabling the `ensure_ascii` flag in `json.dumps()` when converting tool calls to OpenAI format. - **Issue:** Fixes ↓↓↓ input: ```json {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': 'call_nv9trcehdpihr21zj9po19vq', 'function': {'name': 'create_customer', 'arguments': '{"customer_name": "你好啊集团"}'}}]} ``` output: ```json {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': 'call_nv9trcehdpihr21zj9po19vq', 'function': {'name': 'create_customer', 'arguments': '{"customer_name": "\\u4f60\\u597d\\u554a\\u96c6\\u56e2"}'}}]} ``` then: llm will mimic outputting unicode. Unicode's vast number of symbols can lengthen LLM responses, leading to slower performance. <img width="686" height="277" alt="image" src="https://github.com/user-attachments/assets/28f3b007-3964-4455-bee2-68f86ac1906d" /> --------- Co-authored-by: Mason Daugherty <github@mdrxy.com> Co-authored-by: Mason Daugherty <mason@langchain.dev>
This commit is contained in:
@@ -68,6 +68,9 @@ class ChatParrotLink(BaseChatModel):
|
||||
"""
|
||||
# Replace this with actual logic to generate a response from a list
|
||||
# of messages.
|
||||
_ = stop # Mark as used to avoid unused variable warning
|
||||
_ = run_manager # Mark as used to avoid unused variable warning
|
||||
_ = kwargs # Mark as used to avoid unused variable warning
|
||||
last_message = messages[-1]
|
||||
tokens = last_message.content[: self.parrot_buffer_length]
|
||||
ct_input_tokens = sum(len(message.content) for message in messages)
|
||||
@@ -114,6 +117,8 @@ class ChatParrotLink(BaseChatModel):
|
||||
downstream and understand why generation stopped.
|
||||
run_manager: A run manager with callbacks for the LLM.
|
||||
"""
|
||||
_ = stop # Mark as used to avoid unused variable warning
|
||||
_ = kwargs # Mark as used to avoid unused variable warning
|
||||
last_message = messages[-1]
|
||||
tokens = str(last_message.content[: self.parrot_buffer_length])
|
||||
ct_input_tokens = sum(len(message.content) for message in messages)
|
||||
|
@@ -1,5 +1,10 @@
|
||||
"""Test the standard tests on the custom chat model in the docs."""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
|
||||
from langchain_tests.integration_tests import ChatModelIntegrationTests
|
||||
from langchain_tests.unit_tests import ChatModelUnitTests
|
||||
|
||||
@@ -24,3 +29,12 @@ class TestChatParrotLinkIntegration(ChatModelIntegrationTests):
|
||||
@property
|
||||
def chat_model_params(self) -> dict:
|
||||
return {"model": "bird-brain-001", "temperature": 0, "parrot_buffer_length": 50}
|
||||
|
||||
@pytest.mark.xfail(reason="ChatParrotLink doesn't implement bind_tools method")
|
||||
def test_unicode_tool_call_integration(
|
||||
self,
|
||||
model: BaseChatModel,
|
||||
tool_choice: Optional[str] = None,
|
||||
force_tool_call: bool = True,
|
||||
) -> None:
|
||||
"""Expected failure as ChatParrotLink doesn't support tool calling yet."""
|
||||
|
Reference in New Issue
Block a user