diff --git a/pilot/openapi/api_v1/api_v1.py b/pilot/openapi/api_v1/api_v1.py
index 812d532fd..8415b29ef 100644
--- a/pilot/openapi/api_v1/api_v1.py
+++ b/pilot/openapi/api_v1/api_v1.py
@@ -281,7 +281,7 @@ async def stream_generator(chat):
                 )
                 msg = msg.replace("\n", "\\n")
                 yield f"data:{msg}\n\n"
-                await asyncio.sleep(0.1)
+                await asyncio.sleep(0.02)
     else:
         for chunk in model_response:
             if chunk:
@@ -291,7 +291,7 @@ async def stream_generator(chat):
 
                 msg = msg.replace("\n", "\\n")
                 yield f"data:{msg}\n\n"
-                await asyncio.sleep(0.1)
+                await asyncio.sleep(0.02)
 
     chat.current_message.add_ai_message(msg)
     chat.current_message.add_view_message(msg)
diff --git a/pilot/scene/base_chat.py b/pilot/scene/base_chat.py
index ef7dd4b02..01586a701 100644
--- a/pilot/scene/base_chat.py
+++ b/pilot/scene/base_chat.py
@@ -172,7 +172,8 @@ class BaseChat(ABC):
                 from pilot.server.llmserver import worker
                 output = worker.generate_stream_gate(payload)
                 for rsp in output:
-                    rsp_str = str(rsp, "utf-8")
+                    rsp = rsp.replace(b"\0", b"")
+                    rsp_str = rsp.decode()
                     print("[TEST: output]:", rsp_str)
 
             ### output parse