llm: add chatglm

2025-09-21 17:37:52 +00:00 · 2023-05-20 15:45:43 +08:00
parent b466f4afdb
commit c0532246af
3 changed files with 41 additions and 7 deletions
--- a/pilot/model/adapter.py
+++ b/pilot/model/adapter.py
@@ -69,10 +69,6 @@ class ChatGLMAdapater(BaseLLMAdaper):
        ).half().cuda()
        return model, tokenizer
        
-class ZiYaLLaMaAdapter(BaseLLMAdaper):
-    # TODO
-    pass
-
 class CodeGenAdapter(BaseLLMAdaper):
    pass

--- a/pilot/model/chat.py
+++ b/pilot/model/chat.py
@@ -1,3 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-
--- a/pilot/model/chatglm_llm.py
+++ b/pilot/model/chatglm_llm.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
+import torch 
+
+@torch.inference_mode()
+def chatglm_generate_stream(model, tokenizer, params, device, context_len=2048, stream_interval=2):
+    
+    """Generate text using chatglm model's chat api """
+    messages = params["prompt"]
+    max_new_tokens = int(params.get("max_new_tokens", 256))
+    temperature = float(params.get("temperature", 1.0))
+    top_p = float(params.get("top_p", 1.0))
+    echo = params.get("echo", True)
+
+    generate_kwargs = {
+        "max_new_tokens": max_new_tokens,
+        "do_sample": True if temperature > 1e-5 else False,
+        "top_p": top_p,
+        "logits_processor": None
+    }
+
+    if temperature > 1e-5:
+        generate_kwargs["temperature"] = temperature
+
+    hist = [] 
+    for i in range(0, len(messages) - 2, 2):
+        hist.append(messages[i][1], messages[i + 1][1])
+
+    query = messages[-2][1]
+    output = ""
+    i = 0
+    for i, (response, new_hist) in enumerate(model.stream_chat(tokenizer, query, hist, **generate_kwargs)):
+        if echo:
+            output = query + " " + response
+        else:
+            output = response
+        
+        yield output
+
+    yield output