mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-10-26 04:09:22 +00:00
fix merge problem
This commit is contained in:
@@ -7,7 +7,6 @@ import torch
|
||||
import gradio as gr
|
||||
from fastchat.serve.inference import generate_stream, compress_module
|
||||
|
||||
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
BASE_MODE = "/home/magic/workspace/github/DB-GPT/models/vicuna-13b"
|
||||
@@ -21,12 +20,12 @@ model = AutoModelForCausalLM.from_pretrained(
|
||||
)
|
||||
|
||||
def generate(prompt):
|
||||
# compress_module(model, device)
|
||||
# model.to(device)
|
||||
compress_module(model, device)
|
||||
model.to(device)
|
||||
print(model, tokenizer)
|
||||
params = {
|
||||
"model": "vicuna-13b",
|
||||
"prompt": prompt,
|
||||
"prompt": "这是一个用户与助手之间的对话, 助手精通数据库领域的知识, 并能够对数据库领域知识做出非常专业的回答。以下是用户的问题:" + prompt,
|
||||
"temperature": 0.7,
|
||||
"max_new_tokens": 512,
|
||||
"stop": "###"
|
||||
@@ -36,9 +35,6 @@ def generate(prompt):
|
||||
|
||||
for chunk in output:
|
||||
yield chunk
|
||||
#for chunk in output.iter_lines(decode_unicode=False, delimiter=b"\0"):
|
||||
# if chunk:
|
||||
# yield chunk
|
||||
|
||||
if __name__ == "__main__":
|
||||
with gr.Blocks() as demo:
|
||||
@@ -53,5 +49,3 @@ if __name__ == "__main__":
|
||||
|
||||
demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user