From 62eb6c383e7750205ec59e31e48750eb26df720d Mon Sep 17 00:00:00 2001 From: xuyuan23 <643854343@qq.com> Date: Mon, 12 Jun 2023 17:29:35 +0800 Subject: [PATCH 1/5] add openai proxy, support chatgpt model only if you provide open ai secret key. --- pilot/model/llm_out/proxy_llm.py | 33 ++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/pilot/model/llm_out/proxy_llm.py b/pilot/model/llm_out/proxy_llm.py index 68512ec3c..6dd1bfc2b 100644 --- a/pilot/model/llm_out/proxy_llm.py +++ b/pilot/model/llm_out/proxy_llm.py @@ -66,6 +66,7 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048) "messages": history, "temperature": params.get("temperature"), "max_tokens": params.get("max_new_tokens"), + "stream": True } res = requests.post( @@ -75,8 +76,32 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048) text = "" for line in res.iter_lines(): if line: - decoded_line = line.decode("utf-8") - json_line = json.loads(decoded_line) - print(json_line) - text += json_line["choices"][0]["message"]["content"] + json_data = line.split(b': ', 1)[1] + decoded_line = json_data.decode("utf-8") + if decoded_line.lower() != '[DONE]'.lower(): + obj = json.loads(json_data) + if obj['choices'][0]['delta'].get('content') is not None: + content = obj['choices'][0]['delta']['content'] + text += content yield text + + # native result. + # payloads = { + # "model": "gpt-3.5-turbo", # just for test, remove this later + # "messages": history, + # "temperature": params.get("temperature"), + # "max_tokens": params.get("max_new_tokens"), + # } + # + # res = requests.post( + # CFG.proxy_server_url, headers=headers, json=payloads, stream=True + # ) + # + # text = "" + # line = res.content + # if line: + # decoded_line = line.decode("utf-8") + # json_line = json.loads(decoded_line) + # print(json_line) + # text += json_line["choices"][0]["message"]["content"] + # yield text \ No newline at end of file From 6e3b48c7c427c7cbaff24995de9e0c6192528f43 Mon Sep 17 00:00:00 2001 From: xuyuan23 <643854343@qq.com> Date: Mon, 12 Jun 2023 21:29:16 +0800 Subject: [PATCH 2/5] support multi process to launch llmserver, add openai proxy api. --- .env.template | 16 +++++++++++++--- pilot/configs/config.py | 3 ++- pilot/server/llmserver.py | 12 +++++------- pilot/server/webserver.py | 2 +- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/.env.template b/.env.template index 2fb5ff649..88d780ef8 100644 --- a/.env.template +++ b/.env.template @@ -7,6 +7,10 @@ ## For example, to disable coding related features, uncomment the next line # DISABLED_COMMAND_CATEGORIES= +#*******************************************************************# +#** Webserver Port **# +#*******************************************************************# +WEB_SERVER_PORT=7860 #*******************************************************************# #*** LLM PROVIDER ***# @@ -17,6 +21,7 @@ #*******************************************************************# #** LLM MODELS **# #*******************************************************************# +# LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG LLM_MODEL=vicuna-13b MODEL_SERVER=http://127.0.0.1:8000 LIMIT_MODEL_CONCURRENCY=5 @@ -98,15 +103,20 @@ VECTOR_STORE_TYPE=Chroma #MILVUS_SECURE= +#*******************************************************************# +#** WebServer Language Support **# +#*******************************************************************# LANGUAGE=en #LANGUAGE=zh #*******************************************************************# -# ** PROXY_SERVER +# ** PROXY_SERVER (openai interface | chatGPT proxy service), use chatGPT as your LLM. +# ** if your server can visit openai, please set PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions +# ** else if you have a chatgpt proxy server, you can set PROXY_SERVER_URL={your-proxy-serverip:port/xxx} #*******************************************************************# -PROXY_API_KEY= -PROXY_SERVER_URL=http://127.0.0.1:3000/proxy_address +PROXY_API_KEY={your-openai-sk} +PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions #*******************************************************************# diff --git a/pilot/configs/config.py b/pilot/configs/config.py index 971be9170..01f5a1898 100644 --- a/pilot/configs/config.py +++ b/pilot/configs/config.py @@ -17,8 +17,9 @@ class Config(metaclass=Singleton): def __init__(self) -> None: """Initialize the Config class""" - # Gradio language version: en, cn + # Gradio language version: en, zh self.LANGUAGE = os.getenv("LANGUAGE", "en") + self.WEB_SERVER_PORT = int(os.getenv("WEB_SERVER_PORT", 7860)) self.debug_mode = False self.skip_reprompt = False diff --git a/pilot/server/llmserver.py b/pilot/server/llmserver.py index d2730e0d5..a1dba135b 100644 --- a/pilot/server/llmserver.py +++ b/pilot/server/llmserver.py @@ -84,6 +84,11 @@ class ModelWorker: return get_embeddings(self.model, self.tokenizer, prompt) +model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL] +worker = ModelWorker( + model_path=model_path, model_name=CFG.LLM_MODEL, device=DEVICE, num_gpus=1 +) + app = FastAPI() @@ -157,11 +162,4 @@ def embeddings(prompt_request: EmbeddingRequest): if __name__ == "__main__": - model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL] - print(model_path, DEVICE) - - worker = ModelWorker( - model_path=model_path, model_name=CFG.LLM_MODEL, device=DEVICE, num_gpus=1 - ) - uvicorn.run(app, host="0.0.0.0", port=CFG.MODEL_PORT, log_level="info") diff --git a/pilot/server/webserver.py b/pilot/server/webserver.py index 270f3b681..81270ff79 100644 --- a/pilot/server/webserver.py +++ b/pilot/server/webserver.py @@ -658,7 +658,7 @@ def signal_handler(sig, frame): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="0.0.0.0") - parser.add_argument("--port", type=int) + parser.add_argument("--port", type=int, default=CFG.WEB_SERVER_PORT) parser.add_argument("--concurrency-count", type=int, default=10) parser.add_argument( "--model-list-mode", type=str, default="once", choices=["once", "reload"] From 6a8ee918344fd959595b442766522bc80a40faae Mon Sep 17 00:00:00 2001 From: xuyuan23 <643854343@qq.com> Date: Tue, 13 Jun 2023 15:58:24 +0800 Subject: [PATCH 3/5] add plugin_env file, define plugin config strategy. --- .gitignore | 1 + .plugin_env.template | 14 ++++++++++++++ pilot/configs/__init__.py | 1 + 3 files changed, 16 insertions(+) create mode 100644 .plugin_env.template diff --git a/.gitignore b/.gitignore index 82fa7fe62..faebd410a 100644 --- a/.gitignore +++ b/.gitignore @@ -145,3 +145,4 @@ pilot/nltk_data logswebserver.log.* .history/* +.plugin_env \ No newline at end of file diff --git a/.plugin_env.template b/.plugin_env.template new file mode 100644 index 000000000..92d73a1bf --- /dev/null +++ b/.plugin_env.template @@ -0,0 +1,14 @@ +#################################################################################### +## [DB-GPT-Bytebase-Plugin] ### +##################################################################################### +HOST_NAME={your-host-ip, to execute command operate} +HOST_USER=root +HOST_PASSWORD={your-host-password} +SSH_PORT=22 + +BYTE_BASE_COOKIE={your-bytebase-cookie} +BYTE_BASE_DOMAIN={your-bytebase-server-address} +BYTE_BASE_DEFAULT_DEV_INSTANCE=mysql_dev +BYTE_BASE_DEFAULT_TEST_INSTANCE=mysql_test +BYTE_BASE_DEFAULT_PROD_INSTANCE=mysql_prod +DEFAULT_PROJECT_NAME={your-default-project} diff --git a/pilot/configs/__init__.py b/pilot/configs/__init__.py index 909f8bf4b..44f901bbd 100644 --- a/pilot/configs/__init__.py +++ b/pilot/configs/__init__.py @@ -10,5 +10,6 @@ if "pytest" in sys.argv or "pytest" in sys.modules or os.getenv("CI"): # Load the users .env file into environment variables load_dotenv(verbose=True, override=True) +load_dotenv(".plugin_env") del load_dotenv From 24457dc286b60c3a4ad4b854bc8726ab1bcdef5a Mon Sep 17 00:00:00 2001 From: xuyuan23 <643854343@qq.com> Date: Tue, 13 Jun 2023 17:42:11 +0800 Subject: [PATCH 4/5] reformat file proxy_llm.py --- pilot/model/llm_out/proxy_llm.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pilot/model/llm_out/proxy_llm.py b/pilot/model/llm_out/proxy_llm.py index 6dd1bfc2b..e0ec78dd9 100644 --- a/pilot/model/llm_out/proxy_llm.py +++ b/pilot/model/llm_out/proxy_llm.py @@ -66,7 +66,7 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048) "messages": history, "temperature": params.get("temperature"), "max_tokens": params.get("max_new_tokens"), - "stream": True + "stream": True, } res = requests.post( @@ -76,12 +76,12 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048) text = "" for line in res.iter_lines(): if line: - json_data = line.split(b': ', 1)[1] + json_data = line.split(b": ", 1)[1] decoded_line = json_data.decode("utf-8") - if decoded_line.lower() != '[DONE]'.lower(): + if decoded_line.lower() != "[DONE]".lower(): obj = json.loads(json_data) - if obj['choices'][0]['delta'].get('content') is not None: - content = obj['choices'][0]['delta']['content'] + if obj["choices"][0]["delta"].get("content") is not None: + content = obj["choices"][0]["delta"]["content"] text += content yield text @@ -104,4 +104,4 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048) # json_line = json.loads(decoded_line) # print(json_line) # text += json_line["choices"][0]["message"]["content"] - # yield text \ No newline at end of file + # yield text From acc2c5806c01ca8626bd3ec636f6d097b14ed730 Mon Sep 17 00:00:00 2001 From: xuyuan23 <643854343@qq.com> Date: Tue, 13 Jun 2023 17:45:49 +0800 Subject: [PATCH 5/5] remove comment code --- pilot/model/llm_out/proxy_llm.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/pilot/model/llm_out/proxy_llm.py b/pilot/model/llm_out/proxy_llm.py index e0ec78dd9..717311778 100644 --- a/pilot/model/llm_out/proxy_llm.py +++ b/pilot/model/llm_out/proxy_llm.py @@ -84,24 +84,3 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048) content = obj["choices"][0]["delta"]["content"] text += content yield text - - # native result. - # payloads = { - # "model": "gpt-3.5-turbo", # just for test, remove this later - # "messages": history, - # "temperature": params.get("temperature"), - # "max_tokens": params.get("max_new_tokens"), - # } - # - # res = requests.post( - # CFG.proxy_server_url, headers=headers, json=payloads, stream=True - # ) - # - # text = "" - # line = res.content - # if line: - # decoded_line = line.decode("utf-8") - # json_line = json.loads(decoded_line) - # print(json_line) - # text += json_line["choices"][0]["message"]["content"] - # yield text