diff --git a/pilot/model/adapter.py b/pilot/model/adapter.py
index 407d11127..01d05837b 100644
--- a/pilot/model/adapter.py
+++ b/pilot/model/adapter.py
@@ -93,8 +93,8 @@ class ChatGLMAdapater(BaseLLMAdaper):
                 AutoModel.from_pretrained(
                     model_path, trust_remote_code=True, **from_pretrained_kwargs
                 )
-                    .half()
-                    .cuda()
+                .half()
+                .cuda()
             )
             return model, tokenizer
 
diff --git a/pilot/server/llmserver.py b/pilot/server/llmserver.py
index 66180a406..30653a16e 100644
--- a/pilot/server/llmserver.py
+++ b/pilot/server/llmserver.py
@@ -73,7 +73,9 @@ class ModelWorker:
             for output in self.generate_stream_func(
                 self.model, self.tokenizer, params, DEVICE, CFG.MAX_POSITION_EMBEDDINGS
             ):
-                # 生产请不要打开输出！gpt4all线程与父进程共享stdout， 打开会影响前端输出
+                # Please do not open the output in production!
+                # The gpt4all thread shares stdout with the parent process,
+                # and opening it may affect the frontend output.
                 # print("output: ", output)
                 ret = {
                     "text": output,