mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-17 23:18:20 +00:00
gpu out of memory
This commit is contained in:
@@ -18,7 +18,7 @@ llm_model_config = {
|
||||
LLM_MODEL = "vicuna-13b"
|
||||
|
||||
|
||||
vicuna_model_server = "http://192.168.31.114:21000/"
|
||||
vicuna_model_server = "http://127.0.0.1:8000/"
|
||||
|
||||
|
||||
# Load model config
|
||||
|
@@ -10,7 +10,8 @@ def generate_output(model, tokenizer, params, device, context_len=2048):
|
||||
max_new_tokens = int(params.get("max_new_tokens", 256))
|
||||
stop_parameter = params.get("stop", None)
|
||||
|
||||
if stop_parameter == tokenizer.eso_token:
|
||||
print(tokenizer.__dir__())
|
||||
if stop_parameter == tokenizer.eos_token:
|
||||
stop_parameter = None
|
||||
|
||||
stop_strings = []
|
||||
|
@@ -18,7 +18,7 @@ class PromptRequest(BaseModel):
|
||||
prompt: str
|
||||
temperature: float
|
||||
max_new_tokens: int
|
||||
stop: Optional(List[str]) = None
|
||||
stop: Optional[List[str]] = None
|
||||
|
||||
|
||||
class EmbeddingRequest(BaseModel):
|
||||
|
Reference in New Issue
Block a user