mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-07-24 12:45:45 +00:00
feat(model): Support AquilaChat2-34B
This commit is contained in:
parent
52d6095b21
commit
95d3f5222b
@ -23,6 +23,15 @@ WEB_SERVER_PORT=7860
|
||||
#*******************************************************************#
|
||||
# LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG
|
||||
LLM_MODEL=vicuna-13b-v1.5
|
||||
## LLM model path, by default, DB-GPT will read the model path from LLM_MODEL_CONFIG based on the LLM_MODEL.
|
||||
## Of course you can specify your model path according to LLM_MODEL_PATH
|
||||
## In DB-GPT, the priority from high to low to read model path:
|
||||
## 1. environment variable with key: {LLM_MODEL}_MODEL_PATH (Avoid multi-model conflicts)
|
||||
## 2. environment variable with key: MODEL_PATH
|
||||
## 3. environment variable with key: LLM_MODEL_PATH
|
||||
## 4. the config in /pilot/configs/model_config.LLM_MODEL_CONFIG
|
||||
# LLM_MODEL_PATH=/app/models/vicuna-13b-v1.5
|
||||
# LLM_PROMPT_TEMPLATE=vicuna_v1.1
|
||||
MODEL_SERVER=http://127.0.0.1:8000
|
||||
LIMIT_MODEL_CONCURRENCY=5
|
||||
MAX_POSITION_EMBEDDINGS=4096
|
||||
|
@ -1,6 +1,6 @@
|
||||
LLM USE FAQ
|
||||
==================================
|
||||
##### Q1:how to use openai chatgpt service
|
||||
##### Q1: how to use openai chatgpt service
|
||||
change your LLM_MODEL in `.env`
|
||||
````shell
|
||||
LLM_MODEL=proxyllm
|
||||
@ -15,7 +15,7 @@ PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
|
||||
|
||||
make sure your openapi API_KEY is available
|
||||
|
||||
##### Q2 What difference between `python dbgpt_server --light` and `python dbgpt_server`
|
||||
##### Q2: What difference between `python dbgpt_server --light` and `python dbgpt_server`
|
||||
```{note}
|
||||
* `python dbgpt_server --light` dbgpt_server does not start the llm service. Users can deploy the llm service separately by using `python llmserver`, and dbgpt_server accesses the llm service through set the LLM_SERVER environment variable in .env. The purpose is to allow for the separate deployment of dbgpt's backend service and llm service.
|
||||
|
||||
@ -35,7 +35,7 @@ python pilot/server/dbgpt_server.py --light
|
||||
```
|
||||
|
||||
|
||||
##### Q3 How to use MultiGPUs
|
||||
##### Q3: How to use MultiGPUs
|
||||
|
||||
DB-GPT will use all available gpu by default. And you can modify the setting `CUDA_VISIBLE_DEVICES=0,1` in `.env` file
|
||||
to use the specific gpu IDs.
|
||||
@ -52,7 +52,7 @@ CUDA_VISIBLE_DEVICES=3,4,5,6 python3 pilot/server/dbgpt_server.py
|
||||
|
||||
You can modify the setting `MAX_GPU_MEMORY=xxGib` in `.env` file to configure the maximum memory used by each GPU.
|
||||
|
||||
##### Q4 Not Enough Memory
|
||||
##### Q4: Not Enough Memory
|
||||
|
||||
DB-GPT supported 8-bit quantization and 4-bit quantization.
|
||||
|
||||
@ -60,9 +60,9 @@ You can modify the setting `QUANTIZE_8bit=True` or `QUANTIZE_4bit=True` in `.env
|
||||
|
||||
Llama-2-70b with 8-bit quantization can run with 80 GB of VRAM, and 4-bit quantization can run with 48 GB of VRAM.
|
||||
|
||||
Note: you need to install the latest dependencies according to [requirements.txt](https://github.com/eosphoros-ai/DB-GPT/blob/main/requirements.txt).
|
||||
Note: you need to install the quantization dependencies with `pip install -e ".[quantization]"`
|
||||
|
||||
##### Q5 How to Add LLM Service dynamic local mode
|
||||
##### Q5: How to Add LLM Service dynamic local mode
|
||||
|
||||
Now DB-GPT through multi-llm service switch, so how to add llm service dynamic,
|
||||
|
||||
@ -75,7 +75,7 @@ eg: dbgpt model start --model_name chatglm2-6b --model_path /root/DB-GPT/models/
|
||||
chatgpt
|
||||
eg: dbgpt model start --model_name chatgpt_proxyllm --model_path chatgpt_proxyllm --proxy_api_key ${OPENAI_KEY} --proxy_server_url {OPENAI_URL}
|
||||
```
|
||||
##### Q6 How to Add LLM Service dynamic in remote mode
|
||||
##### Q6: How to Add LLM Service dynamic in remote mode
|
||||
If you deploy llm service in remote machine instance, and you want to add model service to dbgpt server to manage
|
||||
|
||||
use dbgpt start worker and set --controller_addr.
|
||||
@ -88,13 +88,13 @@ eg: dbgpt start worker --model_name vicuna-13b-v1.5 \
|
||||
|
||||
```
|
||||
|
||||
##### Q7 dbgpt command not found
|
||||
##### Q7: dbgpt command not found
|
||||
|
||||
```commandline
|
||||
pip install -e "pip install -e ".[default]"
|
||||
```
|
||||
|
||||
##### Q8 When starting the worker_manager on a cloud server and registering it with the controller, it is noticed that the worker's exposed IP is a private IP instead of a public IP, which leads to the inability to access the service.
|
||||
##### Q8: When starting the worker_manager on a cloud server and registering it with the controller, it is noticed that the worker's exposed IP is a private IP instead of a public IP, which leads to the inability to access the service.
|
||||
|
||||
```commandline
|
||||
|
||||
@ -103,4 +103,14 @@ pip install -e "pip install -e ".[default]"
|
||||
automatically determined
|
||||
```
|
||||
|
||||
##### Q9: How to customize model path and prompt template
|
||||
|
||||
DB-GPT will read the model path from `pilot.configs.model_config.LLM_MODEL_CONFIG` based on the `LLM_MODEL`.
|
||||
Of course, you can use the environment variable `LLM_MODEL_PATH` to specify the model path and `LLM_PROMPT_TEMPLATE` to specify your model prompt template.
|
||||
|
||||
```
|
||||
LLM_MODEL=vicuna-13b-v1.5
|
||||
LLM_MODEL_PATH=/app/models/vicuna-13b-v1.5
|
||||
# LLM_PROMPT_TEMPLATE=vicuna_v1.1
|
||||
```
|
||||
|
||||
|
@ -8,7 +8,7 @@ msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 👏👏 0.3.5\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-10-20 22:29+0800\n"
|
||||
"POT-Creation-Date: 2023-10-30 11:37+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
@ -19,34 +19,36 @@ msgstr ""
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:1 54763acec7da4deb90669195c54ec3a1
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:1 98e23f85313c45169ff2ba7f80193356
|
||||
msgid "LLM USE FAQ"
|
||||
msgstr "LLM模型使用FAQ"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:3 66f73fd2ee7b462e92d3f263792a5e33
|
||||
msgid "Q1:how to use openai chatgpt service"
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:3 0d49acfb4af947cb969b249346b00d33
|
||||
#, fuzzy
|
||||
msgid "Q1: how to use openai chatgpt service"
|
||||
msgstr "我怎么使用OPENAI服务"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:4 9d178d8462b74cb188bbacf2ac2ac12b
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:4 7010fec33e264987a29de86c54da93e8
|
||||
#, fuzzy
|
||||
msgid "change your LLM_MODEL in `.env`"
|
||||
msgstr "通过在.env文件设置LLM_MODEL"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:9 f7ca82f257be4ac09639a7f8af5e83eb
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:9 0982d6d5d0b3434fb00698aaf675f3f3
|
||||
msgid "set your OPENAPI KEY"
|
||||
msgstr "set your OPENAPI KEY"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:16 d6255b20dce34a2690df7e2af3505d97
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:16 63650494c1574de09c007e1d470dd53d
|
||||
msgid "make sure your openapi API_KEY is available"
|
||||
msgstr "确认openapi API_KEY是否可用"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:18 6f1c6dbdb31f4210a6d21f0f3a6ae589
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:18 5721ec71e344499d96c55b7e531d7c08
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"Q2 What difference between `python dbgpt_server --light` and `python "
|
||||
"Q2: What difference between `python dbgpt_server --light` and `python "
|
||||
"dbgpt_server`"
|
||||
msgstr "Q2 `python dbgpt_server --light` 和 `python dbgpt_server`的区别是什么?"
|
||||
msgstr "Q2: `python dbgpt_server --light` 和 `python dbgpt_server`的区别是什么?"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:20 b839771ae9e34e998b0edf8d69deabdd
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:20 76a650f195dd40b6a3a3564030cdc040
|
||||
msgid ""
|
||||
"`python dbgpt_server --light` dbgpt_server does not start the llm "
|
||||
"service. Users can deploy the llm service separately by using `python "
|
||||
@ -58,75 +60,75 @@ msgstr ""
|
||||
"用户可以通过`python "
|
||||
"llmserver`单独部署模型服务,dbgpt_server通过LLM_SERVER环境变量来访问模型服务。目的是为了可以将dbgpt后台服务和大模型服务分离部署。"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:22 aba39cef6fe84799bcd03e8f36c41296
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:22 8cd87e3504784d9e891e1fb96c79e143
|
||||
msgid ""
|
||||
"`python dbgpt_server` dbgpt_server service and the llm service are "
|
||||
"deployed on the same instance. when dbgpt_server starts the service, it "
|
||||
"also starts the llm service at the same time."
|
||||
msgstr "`python dbgpt_server` 是将后台服务和模型服务部署在同一台实例上.dbgpt_server在启动服务的时候同时开启模型服务."
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:27 c65270d479af49e28e99b35a7932adbd
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:27 58a6eaf57e6d425685f67058b1a642d4
|
||||
msgid ""
|
||||
"If you want to access an external LLM service(deployed by DB-GPT), you "
|
||||
"need to"
|
||||
msgstr "如果模型服务部署(通过DB-GPT部署)在别的机器,想通过dbgpt服务访问模型服务"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:29 da153e6d18c543f28e0c4e85618e3d3d
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:29 67ac8823ca2e49ba9c833368e2cfb53c
|
||||
msgid ""
|
||||
"1.set the variables LLM_MODEL=YOUR_MODEL_NAME, "
|
||||
"MODEL_SERVER=YOUR_MODEL_SERVER(eg:http://localhost:5000) in the .env "
|
||||
"file."
|
||||
msgstr ""
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:31 cd89b8a2075f4407b8036a74151a6377
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:31 e5c066bcdf0649a1b33bbfc7fd3b1a66
|
||||
msgid "2.execute dbgpt_server.py in light mode"
|
||||
msgstr "2.execute dbgpt_server.py light 模式"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:33 8f4b9401ac4f4a25a7479bee9ef5e8c1
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:33 402ff01d7ee94d97be4a0eb964e39b97
|
||||
msgid "python pilot/server/dbgpt_server.py --light"
|
||||
msgstr ""
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:38 69e1064cd7554ce6b49da732f800eacc
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:38 86190c689d8f4d9a9b58d904e0b5867b
|
||||
#, fuzzy
|
||||
msgid "Q3 How to use MultiGPUs"
|
||||
msgstr "Q2 怎么使用 MultiGPUs"
|
||||
msgid "Q3: How to use MultiGPUs"
|
||||
msgstr "Q3: 怎么使用 MultiGPUs"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:40 6de3f105ce96430db5756f38bbd9ca12
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:40 6b08cff88750440b98956203d8b8a084
|
||||
msgid ""
|
||||
"DB-GPT will use all available gpu by default. And you can modify the "
|
||||
"setting `CUDA_VISIBLE_DEVICES=0,1` in `.env` file to use the specific gpu"
|
||||
" IDs."
|
||||
msgstr "DB-GPT默认加载可利用的gpu,你也可以通过修改 在`.env`文件 `CUDA_VISIBLE_DEVICES=0,1`来指定gpu IDs"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:43 87cb9bfb20af4b259d719df797c42a7d
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:43 93b39089e5be4475b9e90e7813f5a7d9
|
||||
msgid ""
|
||||
"Optionally, you can also specify the gpu ID to use before the starting "
|
||||
"command, as shown below:"
|
||||
msgstr "你也可以指定gpu ID启动"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:53 bcfa35cda6304ee5ab9a775a2d4eda63
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:53 62e3074c109d401fa4bf1ddbdc6c7be1
|
||||
msgid ""
|
||||
"You can modify the setting `MAX_GPU_MEMORY=xxGib` in `.env` file to "
|
||||
"configure the maximum memory used by each GPU."
|
||||
msgstr "同时你可以通过在.env文件设置`MAX_GPU_MEMORY=xxGib`修改每个GPU的最大使用内存"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:55 a05c5484927844c8bb4791f0a9ccc82e
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:55 d235bd83545c476f8e12572658d1c723
|
||||
#, fuzzy
|
||||
msgid "Q4 Not Enough Memory"
|
||||
msgstr "Q3 机器显存不够 "
|
||||
msgid "Q4: Not Enough Memory"
|
||||
msgstr "Q4: 机器显存不够 "
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:57 fe17a023b6eb4a92b1b927e1b94e3784
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:57 b3243ed9147f42bba987d7f9b778e66f
|
||||
msgid "DB-GPT supported 8-bit quantization and 4-bit quantization."
|
||||
msgstr "DB-GPT 支持 8-bit quantization 和 4-bit quantization."
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:59 76c3684c10864b8e87e5c2255b6c0b7f
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:59 1ddb9f94ab994bfebfee46d1c19888d4
|
||||
msgid ""
|
||||
"You can modify the setting `QUANTIZE_8bit=True` or `QUANTIZE_4bit=True` "
|
||||
"in `.env` file to use quantization(8-bit quantization is enabled by "
|
||||
"default)."
|
||||
msgstr "你可以通过在.env文件设置`QUANTIZE_8bit=True` or `QUANTIZE_4bit=True`"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:61 c5d849a38f1a4f0687bbcffb6699dc39
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:61 54b85daa3fb24b17b67a6da31d2be8b0
|
||||
msgid ""
|
||||
"Llama-2-70b with 8-bit quantization can run with 80 GB of VRAM, and 4-bit"
|
||||
" quantization can run with 48 GB of VRAM."
|
||||
@ -134,49 +136,77 @@ msgstr ""
|
||||
"Llama-2-70b with 8-bit quantization 可以运行在 80 GB VRAM机器, 4-bit "
|
||||
"quantization可以运行在 48 GB VRAM"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:63 867329a5e3b0403083e96f72b8747fb2
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:63 097d680aed184fee9eceebee55a47ac1
|
||||
msgid ""
|
||||
"Note: you need to install the latest dependencies according to "
|
||||
"[requirements.txt](https://github.com/eosphoros-ai/DB-"
|
||||
"GPT/blob/main/requirements.txt)."
|
||||
"Note: you need to install the quantization dependencies with `pip install"
|
||||
" -e \".[quantization]\"`"
|
||||
msgstr ""
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:65 60ceee25e9fb4ddba40c5306bfb0a82f
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:65 f3a51056043c49eb84471040f2b364aa
|
||||
#, fuzzy
|
||||
msgid "Q5 How to Add LLM Service dynamic local mode"
|
||||
msgstr "Q5 怎样动态新增模型服务"
|
||||
msgid "Q5: How to Add LLM Service dynamic local mode"
|
||||
msgstr "Q5: 怎样动态新增模型服务"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:67 c99eb7f7ae844884a8f0da94238ea7e0
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:67 43ee6b0f23814c94a4ddb2429801a5e1
|
||||
msgid ""
|
||||
"Now DB-GPT through multi-llm service switch, so how to add llm service "
|
||||
"dynamic,"
|
||||
msgstr "DB-GPT支持多个模型服务切换, 怎样添加一个模型服务呢"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:78 cd89b8a2075f4407b8036a74151a6377
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:78 c217bbf0d2b6425fa7a1c691b7704a8d
|
||||
#, fuzzy
|
||||
msgid "Q6 How to Add LLM Service dynamic in remote mode"
|
||||
msgstr "Q5 怎样动态新增模型服务"
|
||||
msgid "Q6: How to Add LLM Service dynamic in remote mode"
|
||||
msgstr "Q6: 怎样动态新增模型服务"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:79 8833ce89465848259b08ef0a4fa68d96
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:79 195bdaa937a94c7aa0d8c6e1a5430d6e
|
||||
msgid ""
|
||||
"If you deploy llm service in remote machine instance, and you want to "
|
||||
"add model service to dbgpt server to manage"
|
||||
msgstr "如果你想在远程机器实例部署大模型服务并添加到本地dbgpt_server进行管理"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:81 992eb37e3cca48829636c15ba3ec2ee8
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:81 c64098b838a94821963a1d16e56497ff
|
||||
msgid "use dbgpt start worker and set --controller_addr."
|
||||
msgstr "使用1`dbgpt start worker`命令并设置注册地址--controller_addr"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:91 0d06d7d6dd3d4780894ecd914c89b5a2
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:91 cb12d5e9d9d24f14abc3ebea877a4b24
|
||||
#, fuzzy
|
||||
msgid "Q7 dbgpt command not found"
|
||||
msgstr "Q6 dbgpt command not found"
|
||||
msgid "Q7: dbgpt command not found"
|
||||
msgstr "Q7: dbgpt command not found"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:97 5d9beed0d95a4503a43d0e025664273b
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:97 f95cdccfa82d4b3eb2a23dd297131faa
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"Q8 When starting the worker_manager on a cloud server and registering it "
|
||||
"with the controller, it is noticed that the worker's exposed IP is a "
|
||||
"Q8: When starting the worker_manager on a cloud server and registering it"
|
||||
" with the controller, it is noticed that the worker's exposed IP is a "
|
||||
"private IP instead of a public IP, which leads to the inability to access"
|
||||
" the service."
|
||||
msgstr "云服务器启动worker_manager注册到controller时,发现worker暴露的ip是私网ip, 没有以公网ip暴露,导致服务访问不到"
|
||||
msgstr ""
|
||||
"Q8: 云服务器启动worker_manager注册到controller时,发现worker暴露的ip是私网ip, "
|
||||
"没有以公网ip暴露,导致服务访问不到"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:106
|
||||
#: 739a2983f3484acf98e877dc12f4ccda
|
||||
msgid "Q9: How to customize model path and prompt template"
|
||||
msgstr "Q9: 如何自定义模型路径和 prompt 模板"
|
||||
|
||||
#: ../../getting_started/faq/llm/llm_faq.md:108
|
||||
#: 8b82a33a311649c7850c30c00c987c72
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"DB-GPT will read the model path from "
|
||||
"`pilot.configs.model_config.LLM_MODEL_CONFIG` based on the `LLM_MODEL`. "
|
||||
"Of course, you can use the environment variable `LLM_MODEL_PATH` to "
|
||||
"specify the model path and `LLM_PROMPT_TEMPLATE` to specify your model "
|
||||
"prompt template."
|
||||
msgstr ""
|
||||
"DB-GPT 会根据 `LLM_MODEL` 从 `pilot.configs.model_config.LLM_MODEL_CONFIG` "
|
||||
"中读取模型路径。当然,你可以使用环境 `LLM_MODEL_PATH` 来指定模型路径,以及使用 `LLM_PROMPT_TEMPLATE` "
|
||||
"来指定模型的 prompt 模板。"
|
||||
|
||||
#~ msgid ""
|
||||
#~ "Note: you need to install the "
|
||||
#~ "latest dependencies according to "
|
||||
#~ "[requirements.txt](https://github.com/eosphoros-ai/DB-"
|
||||
#~ "GPT/blob/main/requirements.txt)."
|
||||
#~ msgstr ""
|
||||
|
||||
|
@ -194,6 +194,8 @@ class Config(metaclass=Singleton):
|
||||
|
||||
### LLM Model Service Configuration
|
||||
self.LLM_MODEL = os.getenv("LLM_MODEL", "vicuna-13b-v1.5")
|
||||
self.LLM_MODEL_PATH = os.getenv("LLM_MODEL_PATH")
|
||||
|
||||
### Proxy llm backend, this configuration is only valid when "LLM_MODEL=proxyllm"
|
||||
### When we use the rest API provided by deployment frameworks like fastchat as a proxyllm, "PROXYLLM_BACKEND" is the model they actually deploy.
|
||||
### We need to use "PROXYLLM_BACKEND" to load the prompt of the corresponding scene.
|
||||
|
@ -13,7 +13,7 @@ from pilot.utils.api_utils import (
|
||||
_api_remote as api_remote,
|
||||
_sync_api_remote as sync_api_remote,
|
||||
)
|
||||
from pilot.utils.utils import setup_logging
|
||||
from pilot.utils.utils import setup_logging, setup_http_service_logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -149,6 +149,7 @@ def initialize_controller(
|
||||
else:
|
||||
import uvicorn
|
||||
|
||||
setup_http_service_logging()
|
||||
app = FastAPI()
|
||||
app.include_router(router, prefix="/api", tags=["Model"])
|
||||
uvicorn.run(app, host=host, port=port, log_level="info")
|
||||
@ -179,7 +180,8 @@ def run_model_controller():
|
||||
parser = EnvArgumentParser()
|
||||
env_prefix = "controller_"
|
||||
controller_params: ModelControllerParameters = parser.parse_args_into_dataclass(
|
||||
ModelControllerParameters, env_prefix=env_prefix
|
||||
ModelControllerParameters,
|
||||
env_prefixes=[env_prefix],
|
||||
)
|
||||
|
||||
setup_logging(
|
||||
|
@ -76,7 +76,7 @@ class DefaultModelWorker(ModelWorker):
|
||||
model_type = self.llm_adapter.model_type()
|
||||
model_params: ModelParameters = model_args.parse_args_into_dataclass(
|
||||
param_cls,
|
||||
env_prefix=env_prefix,
|
||||
env_prefixes=[env_prefix, "LLM_"],
|
||||
command_args=command_args,
|
||||
model_name=self.model_name,
|
||||
model_path=self.model_path,
|
||||
|
@ -106,7 +106,7 @@ def _parse_embedding_params(
|
||||
env_prefix = EnvArgumentParser.get_env_prefix(model_name)
|
||||
model_params: BaseEmbeddingModelParameters = model_args.parse_args_into_dataclass(
|
||||
param_cls,
|
||||
env_prefix=env_prefix,
|
||||
env_prefixes=[env_prefix],
|
||||
command_args=command_args,
|
||||
model_name=model_name,
|
||||
model_path=model_path,
|
||||
|
@ -38,7 +38,7 @@ from pilot.utils.parameter_utils import (
|
||||
_dict_to_command_args,
|
||||
_get_dict_from_obj,
|
||||
)
|
||||
from pilot.utils.utils import setup_logging
|
||||
from pilot.utils.utils import setup_logging, setup_http_service_logging
|
||||
from pilot.utils.tracer import initialize_tracer, root_tracer, SpanType, SpanTypeRunName
|
||||
from pilot.utils.system_utils import get_system_info
|
||||
|
||||
@ -735,6 +735,8 @@ def _setup_fastapi(
|
||||
):
|
||||
if not app:
|
||||
app = FastAPI()
|
||||
setup_http_service_logging()
|
||||
|
||||
if worker_params.standalone:
|
||||
from pilot.model.cluster.controller.controller import initialize_controller
|
||||
from pilot.model.cluster.controller.controller import (
|
||||
@ -781,7 +783,7 @@ def _parse_worker_params(
|
||||
env_prefix = EnvArgumentParser.get_env_prefix(model_name)
|
||||
worker_params: ModelWorkerParameters = worker_args.parse_args_into_dataclass(
|
||||
ModelWorkerParameters,
|
||||
env_prefix=env_prefix,
|
||||
env_prefixes=[env_prefix],
|
||||
model_name=model_name,
|
||||
model_path=model_path,
|
||||
**kwargs,
|
||||
@ -790,7 +792,7 @@ def _parse_worker_params(
|
||||
# Read parameters agein with prefix of model name.
|
||||
new_worker_params = worker_args.parse_args_into_dataclass(
|
||||
ModelWorkerParameters,
|
||||
env_prefix=env_prefix,
|
||||
env_prefixes=[env_prefix],
|
||||
model_name=worker_params.model_name,
|
||||
model_path=worker_params.model_path,
|
||||
**kwargs,
|
||||
|
@ -95,7 +95,7 @@ class ModelLoader:
|
||||
env_prefix = env_prefix.replace("-", "_")
|
||||
model_params = args_parser.parse_args_into_dataclass(
|
||||
param_cls,
|
||||
env_prefix=env_prefix,
|
||||
env_prefixes=[env_prefix],
|
||||
device=self.device,
|
||||
model_path=self.model_path,
|
||||
model_name=self.model_name,
|
||||
|
@ -445,17 +445,47 @@ class VLLMModelAdaperWrapper(LLMModelAdaper):
|
||||
|
||||
# Covering the configuration of fastcaht, we will regularly feedback the code here to fastchat.
|
||||
# We also recommend that you modify it directly in the fastchat repository.
|
||||
|
||||
# source: https://huggingface.co/BAAI/AquilaChat2-34B/blob/4608b75855334b93329a771aee03869dbf7d88cc/predict.py#L212
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
name="internlm-chat",
|
||||
system_message="A chat between a curious <|User|> and an <|Bot|>. The <|Bot|> gives helpful, detailed, and polite answers to the <|User|>'s questions.\n\n",
|
||||
roles=("<|User|>", "<|Bot|>"),
|
||||
sep_style=SeparatorStyle.CHATINTERN,
|
||||
sep="<eoh>",
|
||||
sep2="<eoa>",
|
||||
stop_token_ids=[1, 103028],
|
||||
# TODO feedback stop_str to fastchat
|
||||
stop_str="<eoa>",
|
||||
),
|
||||
override=True,
|
||||
name="aquila-legacy",
|
||||
system_message="A chat between a curious human and an artificial intelligence assistant. "
|
||||
"The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n",
|
||||
roles=("### Human: ", "### Assistant: ", "System"),
|
||||
messages=(),
|
||||
offset=0,
|
||||
sep_style=SeparatorStyle.NO_COLON_TWO,
|
||||
sep="\n",
|
||||
sep2="</s>",
|
||||
stop_str=["</s>", "[UNK]"],
|
||||
)
|
||||
)
|
||||
# source: https://huggingface.co/BAAI/AquilaChat2-34B/blob/4608b75855334b93329a771aee03869dbf7d88cc/predict.py#L227
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
name="aquila",
|
||||
system_message="A chat between a curious human and an artificial intelligence assistant. "
|
||||
"The assistant gives helpful, detailed, and polite answers to the human's questions.",
|
||||
roles=("Human", "Assistant", "System"),
|
||||
messages=(),
|
||||
offset=0,
|
||||
sep_style=SeparatorStyle.ADD_COLON_TWO,
|
||||
sep="###",
|
||||
sep2="</s>",
|
||||
stop_str=["</s>", "[UNK]"],
|
||||
)
|
||||
)
|
||||
# source: https://huggingface.co/BAAI/AquilaChat2-34B/blob/4608b75855334b93329a771aee03869dbf7d88cc/predict.py#L242
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
name="aquila-v1",
|
||||
roles=("<|startofpiece|>", "<|endofpiece|>", ""),
|
||||
messages=(),
|
||||
offset=0,
|
||||
sep_style=SeparatorStyle.NO_COLON_TWO,
|
||||
sep="",
|
||||
sep2="</s>",
|
||||
stop_str=["</s>", "<|endoftext|>"],
|
||||
)
|
||||
)
|
||||
|
@ -5,8 +5,6 @@ import os
|
||||
from typing import List
|
||||
import logging
|
||||
|
||||
import openai
|
||||
|
||||
from pilot.model.proxy.llms.proxy_model import ProxyModel
|
||||
from pilot.model.parameter import ProxyModelParameters
|
||||
from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
|
||||
@ -15,6 +13,14 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _initialize_openai(params: ProxyModelParameters):
|
||||
try:
|
||||
import openai
|
||||
except ImportError as exc:
|
||||
raise ValueError(
|
||||
"Could not import python package: openai "
|
||||
"Please install openai by command `pip install openai` "
|
||||
) from exc
|
||||
|
||||
api_type = params.proxy_api_type or os.getenv("OPENAI_API_TYPE", "open_ai")
|
||||
|
||||
api_base = params.proxy_api_base or os.getenv(
|
||||
@ -106,6 +112,8 @@ def _build_request(model: ProxyModel, params):
|
||||
def chatgpt_generate_stream(
|
||||
model: ProxyModel, tokenizer, params, device, context_len=2048
|
||||
):
|
||||
import openai
|
||||
|
||||
history, payloads = _build_request(model, params)
|
||||
|
||||
res = openai.ChatCompletion.create(messages=history, **payloads)
|
||||
@ -121,6 +129,8 @@ def chatgpt_generate_stream(
|
||||
async def async_chatgpt_generate_stream(
|
||||
model: ProxyModel, tokenizer, params, device, context_len=2048
|
||||
):
|
||||
import openai
|
||||
|
||||
history, payloads = _build_request(model, params)
|
||||
|
||||
res = await openai.ChatCompletion.acreate(messages=history, **payloads)
|
||||
|
@ -2,6 +2,7 @@ import os
|
||||
import argparse
|
||||
import sys
|
||||
from typing import List
|
||||
import logging
|
||||
|
||||
ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
sys.path.append(ROOT_PATH)
|
||||
@ -39,6 +40,7 @@ from pilot.utils.utils import (
|
||||
setup_logging,
|
||||
_get_logging_level,
|
||||
logging_str_to_uvicorn_level,
|
||||
setup_http_service_logging,
|
||||
)
|
||||
from pilot.utils.tracer import root_tracer, initialize_tracer, SpanType, SpanTypeRunName
|
||||
from pilot.utils.parameter_utils import _get_dict_from_obj
|
||||
@ -127,6 +129,7 @@ def initialize_app(param: WebWerverParameters = None, args: List[str] = None):
|
||||
setup_logging(
|
||||
"pilot", logging_level=param.log_level, logger_filename=param.log_file
|
||||
)
|
||||
|
||||
# Before start
|
||||
system_app.before_start()
|
||||
|
||||
@ -141,7 +144,7 @@ def initialize_app(param: WebWerverParameters = None, args: List[str] = None):
|
||||
|
||||
model_name = param.model_name or CFG.LLM_MODEL
|
||||
|
||||
model_path = LLM_MODEL_CONFIG.get(model_name)
|
||||
model_path = CFG.LLM_MODEL_PATH or LLM_MODEL_CONFIG.get(model_name)
|
||||
if not param.light:
|
||||
print("Model Unified Deployment Mode!")
|
||||
if not param.remote_embedding:
|
||||
@ -180,6 +183,7 @@ def initialize_app(param: WebWerverParameters = None, args: List[str] = None):
|
||||
def run_uvicorn(param: WebWerverParameters):
|
||||
import uvicorn
|
||||
|
||||
setup_http_service_logging()
|
||||
uvicorn.run(
|
||||
app,
|
||||
host=param.host,
|
||||
|
@ -190,6 +190,17 @@ def _genenv_ignoring_key_case(env_key: str, env_prefix: str = None, default_valu
|
||||
)
|
||||
|
||||
|
||||
def _genenv_ignoring_key_case_with_prefixes(
|
||||
env_key: str, env_prefixes: List[str] = None, default_value=None
|
||||
) -> str:
|
||||
if env_prefixes:
|
||||
for env_prefix in env_prefixes:
|
||||
env_var_value = _genenv_ignoring_key_case(env_key, env_prefix)
|
||||
if env_var_value:
|
||||
return env_var_value
|
||||
return _genenv_ignoring_key_case(env_key, default_value=default_value)
|
||||
|
||||
|
||||
class EnvArgumentParser:
|
||||
@staticmethod
|
||||
def get_env_prefix(env_key: str) -> str:
|
||||
@ -201,18 +212,16 @@ class EnvArgumentParser:
|
||||
def parse_args_into_dataclass(
|
||||
self,
|
||||
dataclass_type: Type,
|
||||
env_prefix: str = None,
|
||||
env_prefixes: List[str] = None,
|
||||
command_args: List[str] = None,
|
||||
**kwargs,
|
||||
) -> Any:
|
||||
"""Parse parameters from environment variables and command lines and populate them into data class"""
|
||||
parser = argparse.ArgumentParser()
|
||||
for field in fields(dataclass_type):
|
||||
env_var_value = _genenv_ignoring_key_case(field.name, env_prefix)
|
||||
if not env_var_value:
|
||||
# Read without env prefix
|
||||
env_var_value = _genenv_ignoring_key_case(field.name)
|
||||
|
||||
env_var_value = _genenv_ignoring_key_case_with_prefixes(
|
||||
field.name, env_prefixes
|
||||
)
|
||||
if env_var_value:
|
||||
env_var_value = env_var_value.strip()
|
||||
if field.type is int or field.type == Optional[int]:
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
from typing import Any, List
|
||||
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
@ -184,3 +186,42 @@ def logging_str_to_uvicorn_level(log_level_str):
|
||||
"NOTSET": "info",
|
||||
}
|
||||
return level_str_mapping.get(log_level_str.upper(), "info")
|
||||
|
||||
|
||||
class EndpointFilter(logging.Filter):
|
||||
"""Disable access log on certain endpoint
|
||||
|
||||
source: https://github.com/encode/starlette/issues/864#issuecomment-1254987630
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._path = path
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
return record.getMessage().find(self._path) == -1
|
||||
|
||||
|
||||
def setup_http_service_logging(exclude_paths: List[str] = None):
|
||||
"""Setup http service logging
|
||||
|
||||
Now just disable some logs
|
||||
|
||||
Args:
|
||||
exclude_paths (List[str]): The paths to disable log
|
||||
"""
|
||||
if not exclude_paths:
|
||||
# Not show heartbeat log
|
||||
exclude_paths = ["/api/controller/heartbeat"]
|
||||
uvicorn_logger = logging.getLogger("uvicorn.access")
|
||||
if uvicorn_logger:
|
||||
for path in exclude_paths:
|
||||
uvicorn_logger.addFilter(EndpointFilter(path=path))
|
||||
httpx_logger = logging.getLogger("httpx")
|
||||
if httpx_logger:
|
||||
httpx_logger.setLevel(logging.WARNING)
|
||||
|
Loading…
Reference in New Issue
Block a user