diff --git a/.env.template b/.env.template index 2fb5ff649..88d780ef8 100644 --- a/.env.template +++ b/.env.template @@ -7,6 +7,10 @@ ## For example, to disable coding related features, uncomment the next line # DISABLED_COMMAND_CATEGORIES= +#*******************************************************************# +#** Webserver Port **# +#*******************************************************************# +WEB_SERVER_PORT=7860 #*******************************************************************# #*** LLM PROVIDER ***# @@ -17,6 +21,7 @@ #*******************************************************************# #** LLM MODELS **# #*******************************************************************# +# LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG LLM_MODEL=vicuna-13b MODEL_SERVER=http://127.0.0.1:8000 LIMIT_MODEL_CONCURRENCY=5 @@ -98,15 +103,20 @@ VECTOR_STORE_TYPE=Chroma #MILVUS_SECURE= +#*******************************************************************# +#** WebServer Language Support **# +#*******************************************************************# LANGUAGE=en #LANGUAGE=zh #*******************************************************************# -# ** PROXY_SERVER +# ** PROXY_SERVER (openai interface | chatGPT proxy service), use chatGPT as your LLM. +# ** if your server can visit openai, please set PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions +# ** else if you have a chatgpt proxy server, you can set PROXY_SERVER_URL={your-proxy-serverip:port/xxx} #*******************************************************************# -PROXY_API_KEY= -PROXY_SERVER_URL=http://127.0.0.1:3000/proxy_address +PROXY_API_KEY={your-openai-sk} +PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions #*******************************************************************# diff --git a/pilot/configs/config.py b/pilot/configs/config.py index 971be9170..01f5a1898 100644 --- a/pilot/configs/config.py +++ b/pilot/configs/config.py @@ -17,8 +17,9 @@ class Config(metaclass=Singleton): def __init__(self) -> None: """Initialize the Config class""" - # Gradio language version: en, cn + # Gradio language version: en, zh self.LANGUAGE = os.getenv("LANGUAGE", "en") + self.WEB_SERVER_PORT = int(os.getenv("WEB_SERVER_PORT", 7860)) self.debug_mode = False self.skip_reprompt = False diff --git a/pilot/server/llmserver.py b/pilot/server/llmserver.py index d2730e0d5..a1dba135b 100644 --- a/pilot/server/llmserver.py +++ b/pilot/server/llmserver.py @@ -84,6 +84,11 @@ class ModelWorker: return get_embeddings(self.model, self.tokenizer, prompt) +model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL] +worker = ModelWorker( + model_path=model_path, model_name=CFG.LLM_MODEL, device=DEVICE, num_gpus=1 +) + app = FastAPI() @@ -157,11 +162,4 @@ def embeddings(prompt_request: EmbeddingRequest): if __name__ == "__main__": - model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL] - print(model_path, DEVICE) - - worker = ModelWorker( - model_path=model_path, model_name=CFG.LLM_MODEL, device=DEVICE, num_gpus=1 - ) - uvicorn.run(app, host="0.0.0.0", port=CFG.MODEL_PORT, log_level="info") diff --git a/pilot/server/webserver.py b/pilot/server/webserver.py index 270f3b681..81270ff79 100644 --- a/pilot/server/webserver.py +++ b/pilot/server/webserver.py @@ -658,7 +658,7 @@ def signal_handler(sig, frame): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="0.0.0.0") - parser.add_argument("--port", type=int) + parser.add_argument("--port", type=int, default=CFG.WEB_SERVER_PORT) parser.add_argument("--concurrency-count", type=int, default=10) parser.add_argument( "--model-list-mode", type=str, default="once", choices=["once", "reload"]