mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-07-31 15:47:05 +00:00
support multi process to launch llmserver, add openai proxy api.
This commit is contained in:
parent
62eb6c383e
commit
6e3b48c7c4
@ -7,6 +7,10 @@
|
||||
## For example, to disable coding related features, uncomment the next line
|
||||
# DISABLED_COMMAND_CATEGORIES=
|
||||
|
||||
#*******************************************************************#
|
||||
#** Webserver Port **#
|
||||
#*******************************************************************#
|
||||
WEB_SERVER_PORT=7860
|
||||
|
||||
#*******************************************************************#
|
||||
#*** LLM PROVIDER ***#
|
||||
@ -17,6 +21,7 @@
|
||||
#*******************************************************************#
|
||||
#** LLM MODELS **#
|
||||
#*******************************************************************#
|
||||
# LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG
|
||||
LLM_MODEL=vicuna-13b
|
||||
MODEL_SERVER=http://127.0.0.1:8000
|
||||
LIMIT_MODEL_CONCURRENCY=5
|
||||
@ -98,15 +103,20 @@ VECTOR_STORE_TYPE=Chroma
|
||||
#MILVUS_SECURE=
|
||||
|
||||
|
||||
#*******************************************************************#
|
||||
#** WebServer Language Support **#
|
||||
#*******************************************************************#
|
||||
LANGUAGE=en
|
||||
#LANGUAGE=zh
|
||||
|
||||
|
||||
#*******************************************************************#
|
||||
# ** PROXY_SERVER
|
||||
# ** PROXY_SERVER (openai interface | chatGPT proxy service), use chatGPT as your LLM.
|
||||
# ** if your server can visit openai, please set PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
|
||||
# ** else if you have a chatgpt proxy server, you can set PROXY_SERVER_URL={your-proxy-serverip:port/xxx}
|
||||
#*******************************************************************#
|
||||
PROXY_API_KEY=
|
||||
PROXY_SERVER_URL=http://127.0.0.1:3000/proxy_address
|
||||
PROXY_API_KEY={your-openai-sk}
|
||||
PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
|
||||
|
||||
|
||||
#*******************************************************************#
|
||||
|
@ -17,8 +17,9 @@ class Config(metaclass=Singleton):
|
||||
def __init__(self) -> None:
|
||||
"""Initialize the Config class"""
|
||||
|
||||
# Gradio language version: en, cn
|
||||
# Gradio language version: en, zh
|
||||
self.LANGUAGE = os.getenv("LANGUAGE", "en")
|
||||
self.WEB_SERVER_PORT = int(os.getenv("WEB_SERVER_PORT", 7860))
|
||||
|
||||
self.debug_mode = False
|
||||
self.skip_reprompt = False
|
||||
|
@ -84,6 +84,11 @@ class ModelWorker:
|
||||
return get_embeddings(self.model, self.tokenizer, prompt)
|
||||
|
||||
|
||||
model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL]
|
||||
worker = ModelWorker(
|
||||
model_path=model_path, model_name=CFG.LLM_MODEL, device=DEVICE, num_gpus=1
|
||||
)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@ -157,11 +162,4 @@ def embeddings(prompt_request: EmbeddingRequest):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL]
|
||||
print(model_path, DEVICE)
|
||||
|
||||
worker = ModelWorker(
|
||||
model_path=model_path, model_name=CFG.LLM_MODEL, device=DEVICE, num_gpus=1
|
||||
)
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=CFG.MODEL_PORT, log_level="info")
|
||||
|
@ -658,7 +658,7 @@ def signal_handler(sig, frame):
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--host", type=str, default="0.0.0.0")
|
||||
parser.add_argument("--port", type=int)
|
||||
parser.add_argument("--port", type=int, default=CFG.WEB_SERVER_PORT)
|
||||
parser.add_argument("--concurrency-count", type=int, default=10)
|
||||
parser.add_argument(
|
||||
"--model-list-mode", type=str, default="once", choices=["once", "reload"]
|
||||
|
Loading…
Reference in New Issue
Block a user