diff --git a/pilot/model/adapter.py b/pilot/model/adapter.py index 407d11127..01d05837b 100644 --- a/pilot/model/adapter.py +++ b/pilot/model/adapter.py @@ -93,8 +93,8 @@ class ChatGLMAdapater(BaseLLMAdaper): AutoModel.from_pretrained( model_path, trust_remote_code=True, **from_pretrained_kwargs ) - .half() - .cuda() + .half() + .cuda() ) return model, tokenizer diff --git a/pilot/server/llmserver.py b/pilot/server/llmserver.py index 66180a406..30653a16e 100644 --- a/pilot/server/llmserver.py +++ b/pilot/server/llmserver.py @@ -73,7 +73,9 @@ class ModelWorker: for output in self.generate_stream_func( self.model, self.tokenizer, params, DEVICE, CFG.MAX_POSITION_EMBEDDINGS ): - # 生产请不要打开输出!gpt4all线程与父进程共享stdout, 打开会影响前端输出 + # Please do not open the output in production! + # The gpt4all thread shares stdout with the parent process, + # and opening it may affect the frontend output. # print("output: ", output) ret = { "text": output,