llms: add cpu support

2025-09-26 12:04:39 +00:00 · 2023-05-21 16:05:53 +08:00
parent f52c7523b5
commit 89970bd71c
3 changed files with 16 additions and 5 deletions
--- a/pilot/model/adapter.py
+++ b/pilot/model/adapter.py
@@ -9,6 +9,8 @@ from transformers import (
    AutoModel
 )
 from pilot.configs.model_config import DEVICE
 class BaseLLMAdaper:
    """The Base class for multi model, in our project.
    We will support those model, which performance resemble ChatGPT  """
@@ -64,10 +66,17 @@ class ChatGLMAdapater(BaseLLMAdaper):
    def loader(self, model_path: str, from_pretrained_kwargs: dict):
        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-        model = AutoModel.from_pretrained(
+
-            model_path, trust_remote_code=True, **from_pretrained_kwargs
+        if DEVICE != "cuda":
-        ).half().cuda()
+            model = AutoModel.from_pretrained(
-        return model, tokenizer
+                model_path, trust_remote_code=True, **from_pretrained_kwargs
            ).float()
            return model, tokenizer
        else:
            model = AutoModel.from_pretrained(
                model_path, trust_remote_code=True, **from_pretrained_kwargs
            ).half().cuda()
            return model, tokenizer
 class CodeGenAdapter(BaseLLMAdaper):
    pass
--- a/pilot/server/llmserver.py
+++ b/pilot/server/llmserver.py
@@ -155,6 +155,7 @@ if __name__ == "__main__":
    model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL]
    print(model_path, DEVICE)
    worker = ModelWorker(
        model_path=model_path, 
        model_name=CFG.LLM_MODEL, 
--- a/requirements.txt
+++ b/requirements.txt
@@ -42,6 +42,7 @@ tenacity==8.2.2
 peft
 pycocoevalcap
 sentence-transformers
 cpm_kernels
 umap-learn
 notebook
 gradio==3.23