mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-23 18:43:19 +00:00
Feature xuyuan openai proxy (#197)
1. add openai proxy, support chatgpt model only if you provide openai secret key, please configure in the .env file, in addition, you can configure your own ChatGPT proxy server. Note that you should first set the parameter 'LLM_MODEL=proxyllm'. > PROXY_API_KEY=sk-xxx > PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions > LLM_MODEL=proxyllm 2. Support multi-process start llmserver service > gunicorn llmserver:app -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000 & 3. Support free definition of port for webserver service. default is 7860 > WEB_SERVER_PORT=7860 4. Added configuration file .plugin_env to define plugin variables.
This commit is contained in:
commit
3a446d404b
@ -7,6 +7,10 @@
|
||||
## For example, to disable coding related features, uncomment the next line
|
||||
# DISABLED_COMMAND_CATEGORIES=
|
||||
|
||||
#*******************************************************************#
|
||||
#** Webserver Port **#
|
||||
#*******************************************************************#
|
||||
WEB_SERVER_PORT=7860
|
||||
|
||||
#*******************************************************************#
|
||||
#*** LLM PROVIDER ***#
|
||||
@ -17,6 +21,7 @@
|
||||
#*******************************************************************#
|
||||
#** LLM MODELS **#
|
||||
#*******************************************************************#
|
||||
# LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG
|
||||
LLM_MODEL=vicuna-13b
|
||||
MODEL_SERVER=http://127.0.0.1:8000
|
||||
LIMIT_MODEL_CONCURRENCY=5
|
||||
@ -98,15 +103,20 @@ VECTOR_STORE_TYPE=Chroma
|
||||
#MILVUS_SECURE=
|
||||
|
||||
|
||||
#*******************************************************************#
|
||||
#** WebServer Language Support **#
|
||||
#*******************************************************************#
|
||||
LANGUAGE=en
|
||||
#LANGUAGE=zh
|
||||
|
||||
|
||||
#*******************************************************************#
|
||||
# ** PROXY_SERVER
|
||||
# ** PROXY_SERVER (openai interface | chatGPT proxy service), use chatGPT as your LLM.
|
||||
# ** if your server can visit openai, please set PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
|
||||
# ** else if you have a chatgpt proxy server, you can set PROXY_SERVER_URL={your-proxy-serverip:port/xxx}
|
||||
#*******************************************************************#
|
||||
PROXY_API_KEY=
|
||||
PROXY_SERVER_URL=http://127.0.0.1:3000/proxy_address
|
||||
PROXY_API_KEY={your-openai-sk}
|
||||
PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
|
||||
|
||||
|
||||
#*******************************************************************#
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -145,3 +145,4 @@ pilot/nltk_data
|
||||
|
||||
logswebserver.log.*
|
||||
.history/*
|
||||
.plugin_env
|
14
.plugin_env.template
Normal file
14
.plugin_env.template
Normal file
@ -0,0 +1,14 @@
|
||||
####################################################################################
|
||||
## [DB-GPT-Bytebase-Plugin] ###
|
||||
#####################################################################################
|
||||
HOST_NAME={your-host-ip, to execute command operate}
|
||||
HOST_USER=root
|
||||
HOST_PASSWORD={your-host-password}
|
||||
SSH_PORT=22
|
||||
|
||||
BYTE_BASE_COOKIE={your-bytebase-cookie}
|
||||
BYTE_BASE_DOMAIN={your-bytebase-server-address}
|
||||
BYTE_BASE_DEFAULT_DEV_INSTANCE=mysql_dev
|
||||
BYTE_BASE_DEFAULT_TEST_INSTANCE=mysql_test
|
||||
BYTE_BASE_DEFAULT_PROD_INSTANCE=mysql_prod
|
||||
DEFAULT_PROJECT_NAME={your-default-project}
|
@ -10,5 +10,6 @@ if "pytest" in sys.argv or "pytest" in sys.modules or os.getenv("CI"):
|
||||
|
||||
# Load the users .env file into environment variables
|
||||
load_dotenv(verbose=True, override=True)
|
||||
load_dotenv(".plugin_env")
|
||||
|
||||
del load_dotenv
|
||||
|
@ -17,8 +17,9 @@ class Config(metaclass=Singleton):
|
||||
def __init__(self) -> None:
|
||||
"""Initialize the Config class"""
|
||||
|
||||
# Gradio language version: en, cn
|
||||
# Gradio language version: en, zh
|
||||
self.LANGUAGE = os.getenv("LANGUAGE", "en")
|
||||
self.WEB_SERVER_PORT = int(os.getenv("WEB_SERVER_PORT", 7860))
|
||||
|
||||
self.debug_mode = False
|
||||
self.skip_reprompt = False
|
||||
|
@ -66,6 +66,7 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
|
||||
"messages": history,
|
||||
"temperature": params.get("temperature"),
|
||||
"max_tokens": params.get("max_new_tokens"),
|
||||
"stream": True,
|
||||
}
|
||||
|
||||
res = requests.post(
|
||||
@ -75,14 +76,11 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
|
||||
text = ""
|
||||
for line in res.iter_lines():
|
||||
if line:
|
||||
decoded_line = line.decode("utf-8")
|
||||
try:
|
||||
json_line = json.loads(decoded_line)
|
||||
print(json_line)
|
||||
text += json_line["choices"][0]["message"]["content"]
|
||||
json_data = line.split(b": ", 1)[1]
|
||||
decoded_line = json_data.decode("utf-8")
|
||||
if decoded_line.lower() != "[DONE]".lower():
|
||||
obj = json.loads(json_data)
|
||||
if obj["choices"][0]["delta"].get("content") is not None:
|
||||
content = obj["choices"][0]["delta"]["content"]
|
||||
text += content
|
||||
yield text
|
||||
except Exception as e:
|
||||
text += decoded_line
|
||||
yield json.loads(text)["choices"][0]["message"]["content"]
|
||||
|
||||
|
||||
|
@ -84,6 +84,11 @@ class ModelWorker:
|
||||
return get_embeddings(self.model, self.tokenizer, prompt)
|
||||
|
||||
|
||||
model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL]
|
||||
worker = ModelWorker(
|
||||
model_path=model_path, model_name=CFG.LLM_MODEL, device=DEVICE, num_gpus=1
|
||||
)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@ -157,11 +162,4 @@ def embeddings(prompt_request: EmbeddingRequest):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL]
|
||||
print(model_path, DEVICE)
|
||||
|
||||
worker = ModelWorker(
|
||||
model_path=model_path, model_name=CFG.LLM_MODEL, device=DEVICE, num_gpus=1
|
||||
)
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=CFG.MODEL_PORT, log_level="info")
|
||||
|
@ -658,7 +658,7 @@ def signal_handler(sig, frame):
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--host", type=str, default="0.0.0.0")
|
||||
parser.add_argument("--port", type=int)
|
||||
parser.add_argument("--port", type=int, default=CFG.WEB_SERVER_PORT)
|
||||
parser.add_argument("--concurrency-count", type=int, default=10)
|
||||
parser.add_argument(
|
||||
"--model-list-mode", type=str, default="once", choices=["once", "reload"]
|
||||
|
Loading…
Reference in New Issue
Block a user