Feature xuyuan openai proxy (#197)

1. add openai proxy, support chatgpt model only if you provide openai secret key, please configure in the .env file, in addition, you can configure your own ChatGPT proxy server. Note that you should first set the parameter 'LLM_MODEL=proxyllm'. > PROXY_API_KEY=sk-xxx > PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions > LLM_MODEL=proxyllm 2. Support multi-process start llmserver service > gunicorn llmserver:app -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000 & 3. Support free definition of port for webserver service. default is 7860 > WEB_SERVER_PORT=7860 4. Added configuration file .plugin_env to define plugin variables.
2025-08-23 18:43:19 +00:00 · 2023-06-13 21:24:50 +08:00 · 2023-06-13 21:24:50 +08:00 · 3a446d404b
commit 3a446d404b
parent 7627867863 f755c533cd
8 changed files with 46 additions and 23 deletions
--- a/.env.template
+++ b/.env.template
@ -7,6 +7,10 @@
 ## For example, to disable coding related features, uncomment the next line
 # DISABLED_COMMAND_CATEGORIES=   

+#*******************************************************************#
+#**                        Webserver Port                         **#
+#*******************************************************************#
+WEB_SERVER_PORT=7860

 #*******************************************************************#
 #***                       LLM PROVIDER                          ***#
@ -17,6 +21,7 @@
 #*******************************************************************#
 #**                         LLM MODELS                            **#
 #*******************************************************************#
+# LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG
 LLM_MODEL=vicuna-13b
 MODEL_SERVER=http://127.0.0.1:8000
 LIMIT_MODEL_CONCURRENCY=5
@ -98,15 +103,20 @@ VECTOR_STORE_TYPE=Chroma
 #MILVUS_SECURE=


+#*******************************************************************#
+#**                  WebServer Language Support                   **#
+#*******************************************************************#
 LANGUAGE=en
 #LANGUAGE=zh


 #*******************************************************************#
-# **    PROXY_SERVER
+# **    PROXY_SERVER (openai interface | chatGPT proxy service), use chatGPT as your LLM.
+# ** if your server can visit openai, please set PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
+# ** else if you have a chatgpt proxy server, you can set PROXY_SERVER_URL={your-proxy-serverip:port/xxx}
 #*******************************************************************#
-PROXY_API_KEY=
-PROXY_SERVER_URL=http://127.0.0.1:3000/proxy_address
+PROXY_API_KEY={your-openai-sk}
+PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions


 #*******************************************************************#
--- a/.gitignore
+++ b/.gitignore
@ -145,3 +145,4 @@ pilot/nltk_data

 logswebserver.log.*
 .history/*
+.plugin_env
--- a/.plugin_env.template
+++ b/.plugin_env.template
@ -0,0 +1,14 @@
+####################################################################################
+##                       [DB-GPT-Bytebase-Plugin]                                ###
+#####################################################################################
+HOST_NAME={your-host-ip, to execute command operate}
+HOST_USER=root
+HOST_PASSWORD={your-host-password}
+SSH_PORT=22
+
+BYTE_BASE_COOKIE={your-bytebase-cookie}
+BYTE_BASE_DOMAIN={your-bytebase-server-address}
+BYTE_BASE_DEFAULT_DEV_INSTANCE=mysql_dev
+BYTE_BASE_DEFAULT_TEST_INSTANCE=mysql_test
+BYTE_BASE_DEFAULT_PROD_INSTANCE=mysql_prod
+DEFAULT_PROJECT_NAME={your-default-project}
--- a/pilot/configs/init.py
+++ b/pilot/configs/init.py
@ -10,5 +10,6 @@ if "pytest" in sys.argv or "pytest" in sys.modules or os.getenv("CI"):

 # Load the users .env file into environment variables
 load_dotenv(verbose=True, override=True)
+load_dotenv(".plugin_env")

 del load_dotenv
--- a/pilot/configs/config.py
+++ b/pilot/configs/config.py
@ -17,8 +17,9 @@ class Config(metaclass=Singleton):
    def __init__(self) -> None:
        """Initialize the Config class"""

-        # Gradio language version: en, cn
+        # Gradio language version: en, zh
        self.LANGUAGE = os.getenv("LANGUAGE", "en")
+        self.WEB_SERVER_PORT = int(os.getenv("WEB_SERVER_PORT", 7860))

        self.debug_mode = False
        self.skip_reprompt = False
--- a/pilot/model/llm_out/proxy_llm.py
+++ b/pilot/model/llm_out/proxy_llm.py
@ -66,6 +66,7 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
        "messages": history,
        "temperature": params.get("temperature"),
        "max_tokens": params.get("max_new_tokens"),
+        "stream": True,
    }

    res = requests.post(
@ -75,14 +76,11 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
    text = ""
    for line in res.iter_lines():
        if line:
-            decoded_line = line.decode("utf-8")
-            try:
-                json_line = json.loads(decoded_line)
-                print(json_line)
-                text += json_line["choices"][0]["message"]["content"]
+            json_data = line.split(b": ", 1)[1]
+            decoded_line = json_data.decode("utf-8")
+            if decoded_line.lower() != "[DONE]".lower():
+                obj = json.loads(json_data)
+                if obj["choices"][0]["delta"].get("content") is not None:
+                    content = obj["choices"][0]["delta"]["content"]
+                    text += content
            yield text
-            except Exception as e:
-                text += decoded_line
-    yield json.loads(text)["choices"][0]["message"]["content"]
-    
-            
--- a/pilot/server/llmserver.py
+++ b/pilot/server/llmserver.py
@ -84,6 +84,11 @@ class ModelWorker:
        return get_embeddings(self.model, self.tokenizer, prompt)


+model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL]
+worker = ModelWorker(
+    model_path=model_path, model_name=CFG.LLM_MODEL, device=DEVICE, num_gpus=1
+)
+
 app = FastAPI()


@ -157,11 +162,4 @@ def embeddings(prompt_request: EmbeddingRequest):


 if __name__ == "__main__":
-    model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL]
-    print(model_path, DEVICE)
-
-    worker = ModelWorker(
-        model_path=model_path, model_name=CFG.LLM_MODEL, device=DEVICE, num_gpus=1
-    )
-
    uvicorn.run(app, host="0.0.0.0", port=CFG.MODEL_PORT, log_level="info")
--- a/pilot/server/webserver.py
+++ b/pilot/server/webserver.py
@ -658,7 +658,7 @@ def signal_handler(sig, frame):
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--host", type=str, default="0.0.0.0")
-    parser.add_argument("--port", type=int)
+    parser.add_argument("--port", type=int, default=CFG.WEB_SERVER_PORT)
    parser.add_argument("--concurrency-count", type=int, default=10)
    parser.add_argument(
        "--model-list-mode", type=str, default="once", choices=["once", "reload"]