diff --git a/pilot/openapi/api_v1/api_v1.py b/pilot/openapi/api_v1/api_v1.py index 812d532fd..8415b29ef 100644 --- a/pilot/openapi/api_v1/api_v1.py +++ b/pilot/openapi/api_v1/api_v1.py @@ -281,7 +281,7 @@ async def stream_generator(chat): ) msg = msg.replace("\n", "\\n") yield f"data:{msg}\n\n" - await asyncio.sleep(0.1) + await asyncio.sleep(0.02) else: for chunk in model_response: if chunk: @@ -291,7 +291,7 @@ async def stream_generator(chat): msg = msg.replace("\n", "\\n") yield f"data:{msg}\n\n" - await asyncio.sleep(0.1) + await asyncio.sleep(0.02) chat.current_message.add_ai_message(msg) chat.current_message.add_view_message(msg) diff --git a/pilot/scene/base_chat.py b/pilot/scene/base_chat.py index ef7dd4b02..01586a701 100644 --- a/pilot/scene/base_chat.py +++ b/pilot/scene/base_chat.py @@ -172,7 +172,8 @@ class BaseChat(ABC): from pilot.server.llmserver import worker output = worker.generate_stream_gate(payload) for rsp in output: - rsp_str = str(rsp, "utf-8") + rsp = rsp.replace(b"\0", b"") + rsp_str = rsp.decode() print("[TEST: output]:", rsp_str) ### output parse