gpu out of memory

2025-08-06 19:04:24 +00:00 · 2023-04-29 22:30:42 +08:00 · 2023-04-29 22:30:42 +08:00 · 80dac77b7a
commit 80dac77b7a
parent a0f44efba5
4 changed files with 18 additions and 6 deletions
--- a/README.md
+++ b/README.md
@ -3,9 +3,20 @@ A Open Database-GPT Experiment
 ![GitHub Repo stars](https://img.shields.io/github/stars/csunny/db-gpt?style=social)
 DB-GPT is an experimental open-source application that builds upon the fastchat model and uses vicuna as its base model. Additionally, it looks like this application incorporates langchain and llama-index embedding knowledge to improve Database-QA capabilities. 
-DB-GPT is an experimental open-source application, which based on the vicuna base model. 
+Overall, it appears to be a sophisticated and innovative tool for working with databases. If you have any specific questions about how to use or implement DB-GPT in your work, please let me know and I'll do my best to assist you.
 # Install
 1. Run model server
 ```
 cd pilot/server
 uvicorn icuna_server:app --host 0.0.0.0
 ```
-## Featurs
+2. Run gradio webui
-Coming soon, please wait... 
+
 # Featurs
 - SQL-Generate
 - Database-QA Based Knowledge 
 - SQL-diagnosis
--- a/pilot/configs/model_config.py
+++ b/pilot/configs/model_config.py
@ -18,7 +18,7 @@ llm_model_config = {
 LLM_MODEL = "vicuna-13b"
-vicuna_model_server = "http://192.168.31.114:21000/"
+vicuna_model_server = "http://127.0.0.1:8000/"
 # Load model config
--- a/pilot/model/inference.py
+++ b/pilot/model/inference.py
@ -10,7 +10,8 @@ def generate_output(model, tokenizer, params, device, context_len=2048):
    max_new_tokens = int(params.get("max_new_tokens", 256))
    stop_parameter = params.get("stop", None)
-    if stop_parameter == tokenizer.eso_token:
+    print(tokenizer.__dir__())
    if stop_parameter == tokenizer.eos_token:
        stop_parameter = None
    stop_strings = []
--- a/pilot/server/vicuna_server.py
+++ b/pilot/server/vicuna_server.py
@ -18,7 +18,7 @@ class PromptRequest(BaseModel):
    prompt: str
    temperature: float
    max_new_tokens: int
-    stop: Optional(List[str]) = None
+    stop: Optional[List[str]] = None
 class EmbeddingRequest(BaseModel):