mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-16 22:51:24 +00:00
add token size
This commit is contained in:
@@ -27,7 +27,7 @@ LLM_MODEL_CONFIG = {
|
||||
VECTOR_SEARCH_TOP_K = 3
|
||||
LLM_MODEL = "vicuna-13b"
|
||||
LIMIT_MODEL_CONCURRENCY = 5
|
||||
MAX_POSITION_EMBEDDINGS = 2048
|
||||
MAX_POSITION_EMBEDDINGS = 4096
|
||||
VICUNA_MODEL_SERVER = "http://192.168.31.114:8000"
|
||||
|
||||
|
||||
|
@@ -5,13 +5,13 @@ import torch
|
||||
|
||||
@torch.inference_mode()
|
||||
def generate_stream(model, tokenizer, params, device,
|
||||
context_len=2048, stream_interval=2):
|
||||
context_len=4096, stream_interval=2):
|
||||
|
||||
"""Fork from fastchat: https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/inference.py """
|
||||
prompt = params["prompt"]
|
||||
l_prompt = len(prompt)
|
||||
temperature = float(params.get("temperature", 1.0))
|
||||
max_new_tokens = int(params.get("max_new_tokens", 256))
|
||||
max_new_tokens = int(params.get("max_new_tokens", 2048))
|
||||
stop_str = params.get("stop", None)
|
||||
|
||||
input_ids = tokenizer(prompt).input_ids
|
||||
|
@@ -301,8 +301,8 @@ def build_single_model_ui():
|
||||
|
||||
max_output_tokens = gr.Slider(
|
||||
minimum=0,
|
||||
maximum=1024,
|
||||
value=512,
|
||||
maximum=4096,
|
||||
value=2048,
|
||||
step=64,
|
||||
interactive=True,
|
||||
label="最大输出Token数",
|
||||
|
@@ -51,4 +51,5 @@ wandb
|
||||
llama-index==0.5.27
|
||||
pymysql
|
||||
unstructured==0.6.3
|
||||
pytesseract==0.3.10
|
||||
pytesseract==0.3.10
|
||||
chromadb
|
Reference in New Issue
Block a user