Feature xuyuan openai proxy (#197)

1. add openai proxy, support chatgpt model only if you provide openai
secret key, please configure in the .env file, in addition, you can
configure your own ChatGPT proxy server. Note that you should first set
the parameter 'LLM_MODEL=proxyllm'.
> PROXY_API_KEY=sk-xxx
> PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
> LLM_MODEL=proxyllm

2. Support multi-process start llmserver service
> gunicorn llmserver:app -w 4 -k uvicorn.workers.UvicornWorker -b
0.0.0.0:8000 &

3. Support free definition of port for webserver service. default is
7860
> WEB_SERVER_PORT=7860

4. Added configuration file .plugin_env to define plugin variables.
This commit is contained in:
Shinexy 2023-06-13 21:24:50 +08:00 committed by GitHub
commit 3a446d404b
8 changed files with 46 additions and 23 deletions

View File

@ -7,6 +7,10 @@
## For example, to disable coding related features, uncomment the next line
# DISABLED_COMMAND_CATEGORIES=
#*******************************************************************#
#** Webserver Port **#
#*******************************************************************#
WEB_SERVER_PORT=7860
#*******************************************************************#
#*** LLM PROVIDER ***#
@ -17,6 +21,7 @@
#*******************************************************************#
#** LLM MODELS **#
#*******************************************************************#
# LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG
LLM_MODEL=vicuna-13b
MODEL_SERVER=http://127.0.0.1:8000
LIMIT_MODEL_CONCURRENCY=5
@ -98,15 +103,20 @@ VECTOR_STORE_TYPE=Chroma
#MILVUS_SECURE=
#*******************************************************************#
#** WebServer Language Support **#
#*******************************************************************#
LANGUAGE=en
#LANGUAGE=zh
#*******************************************************************#
# ** PROXY_SERVER
# ** PROXY_SERVER (openai interface | chatGPT proxy service), use chatGPT as your LLM.
# ** if your server can visit openai, please set PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
# ** else if you have a chatgpt proxy server, you can set PROXY_SERVER_URL={your-proxy-serverip:port/xxx}
#*******************************************************************#
PROXY_API_KEY=
PROXY_SERVER_URL=http://127.0.0.1:3000/proxy_address
PROXY_API_KEY={your-openai-sk}
PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
#*******************************************************************#

1
.gitignore vendored
View File

@ -145,3 +145,4 @@ pilot/nltk_data
logswebserver.log.*
.history/*
.plugin_env

14
.plugin_env.template Normal file
View File

@ -0,0 +1,14 @@
####################################################################################
## [DB-GPT-Bytebase-Plugin] ###
#####################################################################################
HOST_NAME={your-host-ip, to execute command operate}
HOST_USER=root
HOST_PASSWORD={your-host-password}
SSH_PORT=22
BYTE_BASE_COOKIE={your-bytebase-cookie}
BYTE_BASE_DOMAIN={your-bytebase-server-address}
BYTE_BASE_DEFAULT_DEV_INSTANCE=mysql_dev
BYTE_BASE_DEFAULT_TEST_INSTANCE=mysql_test
BYTE_BASE_DEFAULT_PROD_INSTANCE=mysql_prod
DEFAULT_PROJECT_NAME={your-default-project}

View File

@ -10,5 +10,6 @@ if "pytest" in sys.argv or "pytest" in sys.modules or os.getenv("CI"):
# Load the users .env file into environment variables
load_dotenv(verbose=True, override=True)
load_dotenv(".plugin_env")
del load_dotenv

View File

@ -17,8 +17,9 @@ class Config(metaclass=Singleton):
def __init__(self) -> None:
"""Initialize the Config class"""
# Gradio language version: en, cn
# Gradio language version: en, zh
self.LANGUAGE = os.getenv("LANGUAGE", "en")
self.WEB_SERVER_PORT = int(os.getenv("WEB_SERVER_PORT", 7860))
self.debug_mode = False
self.skip_reprompt = False

View File

@ -66,6 +66,7 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
"messages": history,
"temperature": params.get("temperature"),
"max_tokens": params.get("max_new_tokens"),
"stream": True,
}
res = requests.post(
@ -75,14 +76,11 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
text = ""
for line in res.iter_lines():
if line:
decoded_line = line.decode("utf-8")
try:
json_line = json.loads(decoded_line)
print(json_line)
text += json_line["choices"][0]["message"]["content"]
yield text
except Exception as e:
text += decoded_line
yield json.loads(text)["choices"][0]["message"]["content"]
json_data = line.split(b": ", 1)[1]
decoded_line = json_data.decode("utf-8")
if decoded_line.lower() != "[DONE]".lower():
obj = json.loads(json_data)
if obj["choices"][0]["delta"].get("content") is not None:
content = obj["choices"][0]["delta"]["content"]
text += content
yield text

View File

@ -84,6 +84,11 @@ class ModelWorker:
return get_embeddings(self.model, self.tokenizer, prompt)
model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL]
worker = ModelWorker(
model_path=model_path, model_name=CFG.LLM_MODEL, device=DEVICE, num_gpus=1
)
app = FastAPI()
@ -157,11 +162,4 @@ def embeddings(prompt_request: EmbeddingRequest):
if __name__ == "__main__":
model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL]
print(model_path, DEVICE)
worker = ModelWorker(
model_path=model_path, model_name=CFG.LLM_MODEL, device=DEVICE, num_gpus=1
)
uvicorn.run(app, host="0.0.0.0", port=CFG.MODEL_PORT, log_level="info")

View File

@ -658,7 +658,7 @@ def signal_handler(sig, frame):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="0.0.0.0")
parser.add_argument("--port", type=int)
parser.add_argument("--port", type=int, default=CFG.WEB_SERVER_PORT)
parser.add_argument("--concurrency-count", type=int, default=10)
parser.add_argument(
"--model-list-mode", type=str, default="once", choices=["once", "reload"]