diff --git a/pilot/configs/config.py b/pilot/configs/config.py index 9023bc061..b914390f7 100644 --- a/pilot/configs/config.py +++ b/pilot/configs/config.py @@ -105,7 +105,8 @@ class Config(metaclass=Singleton): self.LLM_MODEL = os.getenv("LLM_MODEL", "vicuna-13b") self.LIMIT_MODEL_CONCURRENCY = int(os.getenv("LIMIT_MODEL_CONCURRENCY", 5)) self.MAX_POSITION_EMBEDDINGS = int(os.getenv("MAX_POSITION_EMBEDDINGS", 4096)) - self.MODEL_SERVER = os.getenv("MODEL_SERVER", "http://121.41.167.183:8000") + self.MODEL_PORT = os.getenv("MODEL_PORT", 8000) + self.MODEL_SERVER = os.getenv("MODEL_SERVER", "http://127.0.0.1" + ":" + str(self.MODEL_PORT)) self.ISLOAD_8BIT = os.getenv("ISLOAD_8BIT", "True") == "True" def set_debug_mode(self, value: bool) -> None: diff --git a/pilot/server/llmserver.py b/pilot/server/llmserver.py index e341cc457..e1c7556f6 100644 --- a/pilot/server/llmserver.py +++ b/pilot/server/llmserver.py @@ -130,4 +130,4 @@ def embeddings(prompt_request: EmbeddingRequest): if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", log_level="info") \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=CFG.MODEL_PORT, log_level="info") \ No newline at end of file