fix(model): Fix apiserver error (#2605)

This commit is contained in:
Fangyin Cheng
2025-04-10 10:23:49 +08:00
committed by GitHub
parent babc484261
commit 5ddd9e5bf8
39 changed files with 1300 additions and 243 deletions

View File

@@ -22,6 +22,7 @@ import TabItem from '@theme/TabItem';
values={[
{label: 'Curl', value: 'curl'},
{label: 'Python', value: 'python'},
{label: 'Python(OpenAI SDK)', value: 'openai-sdk'},
]
}>
@@ -54,8 +55,40 @@ async for data in client.chat_stream(
print(data)
```
</TabItem>
<TabItem value="openai-sdk">
```python
from openai import OpenAI
DBGPT_API_KEY = "dbgpt"
client = OpenAI(
api_key=DBGPT_API_KEY,
base_url="http://localhost:5670/api/v2"
)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": "Hello",
},
],
extra_body={
"chat_mode": "chat_normal",
},
stream=True,
max_tokens=2048,
)
for chunk in response:
delta_content = chunk.choices[0].delta.content
print(delta_content, end="", flush=True)
```
</TabItem>
</Tabs>
### Chat Completion Stream Response
```commandline
data: {"id": "chatcmpl-ba6fb52e-e5b2-11ee-b031-acde48001122", "model": "gpt-4o", "choices": [{"index": 0, "delta": {"role": "assistant", "content": "Hello"}}]}
@@ -110,6 +143,8 @@ from dbgpt_client import Client
DBGPT_API_KEY = "dbgpt"
client = Client(api_key=DBGPT_API_KEY)
response = await client.chat(model="gpt-4o" ,messages="hello")
print(response)
await client.aclose()
```
</TabItem>
</Tabs>

View File

@@ -21,6 +21,7 @@ import TabItem from '@theme/TabItem';
values={[
{label: 'Curl', value: 'curl'},
{label: 'Python', value: 'python'},
{label: 'Python(OpenAI SDK)', value: 'openai-sdk'},
]
}>
@@ -56,6 +57,40 @@ res = client.chat(
)
```
</TabItem>
<TabItem value="openai-sdk">
```python
from openai import OpenAI
DBGPT_API_KEY = "dbgpt"
DB_NAME="{your_db_name}"
client = OpenAI(
api_key=DBGPT_API_KEY,
base_url="http://localhost:5670/api/v2"
)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": "Hello",
},
],
extra_body={
"chat_mode": "chat_data",
"chat_param": DB_NAME,
},
stream=True,
max_tokens=2048,
)
for chunk in response:
delta_content = chunk.choices[0].delta.content
print(delta_content, end="", flush=True)
```
</TabItem>
</Tabs>
#### Chat Completion Response

View File

@@ -21,6 +21,7 @@ import TabItem from '@theme/TabItem';
values={[
{label: 'Curl', value: 'curl'},
{label: 'Python', value: 'python'},
{label: 'Python(OpenAI SDK)', value: 'openai-sdk'},
]
}>
@@ -34,7 +35,7 @@ curl -X POST "http://localhost:5670/api/v2/chat/completions" \
-H "Authorization: Bearer $DBGPT_API_KEY" \
-H "accept: application/json" \
-H "Content-Type: application/json" \
-d "{\"messages\":\"Hello\",\"model\":\"chatgpt_proxyllm\", \"chat_mode\": \"chat_flow\", \"chat_param\": \"$FLOW_ID\"}"
-d "{\"messages\":\"Hello\",\"model\":\"gpt-4o\", \"chat_mode\": \"chat_flow\", \"chat_param\": \"$FLOW_ID\"}"
```
</TabItem>
@@ -50,18 +51,53 @@ FLOW_ID="{YOUR_FLOW_ID}"
client = Client(api_key=DBGPT_API_KEY)
async for data in client.chat_stream(
messages="Introduce AWEL",
model="chatgpt_proxyllm",
model="gpt-4o",
chat_mode="chat_flow",
chat_param=FLOW_ID
):
print(data)
```
</TabItem>
<TabItem value="openai-sdk">
```python
from openai import OpenAI
DBGPT_API_KEY = "dbgpt"
FLOW_ID="{YOUR_FLOW_ID}"
client = OpenAI(
api_key=DBGPT_API_KEY,
base_url="http://localhost:5670/api/v2"
)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": "Hello",
},
],
extra_body={
"chat_mode": "chat_flow",
"chat_param": FLOW_ID,
},
stream=True,
max_tokens=2048,
)
for chunk in response:
delta_content = chunk.choices[0].delta.content
print(delta_content, end="", flush=True)
```
</TabItem>
</Tabs>
#### Chat Completion Stream Response
```commandline
data: {"id": "579f8862-fc4b-481e-af02-a127e6d036c8", "created": 1710918094, "model": "chatgpt_proxyllm", "choices": [{"index": 0, "delta": {"role": "assistant", "content": "\n\n"}}]}
data: {"id": "579f8862-fc4b-481e-af02-a127e6d036c8", "created": 1710918094, "model": "gpt-4o", "choices": [{"index": 0, "delta": {"role": "assistant", "content": "\n\n"}}]}
```
### Create Flow

View File

@@ -2,7 +2,7 @@
This is the introduction to the DB-GPT API documentation. You can interact with the API through HTTP requests from any language, via our official Python Client bindings.
# Authentication
## Authentication
The DB-GPT API uses API keys for authentication. Visit your API Keys page to retrieve the API key you'll use in your requests.
Production requests must be routed through your own backend server where your API key can be securely loaded from an environment variable or key management service.
@@ -34,10 +34,18 @@ API_KEYS - The list of API keys that are allowed to access the API. Each of the
API_KEYS=dbgpt
```
## Installation
## Using the DB-GPT official Python Client
If you use Python, you should install the official DB-GPT Client package from PyPI:
```bash
pip install "dbgpt[client]>=0.5.2"
pip install "dbgpt-client>=0.7.1rc0"
```
## Using the OpenAI Python SDK
In some chat cases, you can use the OpenAI Python SDK to interact with the DB-GPT API. The DB-GPT API is compatible with the OpenAI API.
```bash
pip install openai
```

View File

@@ -21,6 +21,7 @@ import TabItem from '@theme/TabItem';
values={[
{label: 'Curl', value: 'curl'},
{label: 'Python', value: 'python'},
{label: 'Python(OpenAI SDK)', value: 'openai-sdk'},
]
}>
@@ -57,6 +58,41 @@ async for data in client.chat_stream(
print(data)
```
</TabItem>
<TabItem value="openai-sdk">
```python
from openai import OpenAI
DBGPT_API_KEY = "dbgpt"
SPACE_NAME="{YOUR_SPACE_NAME}"
client = OpenAI(
api_key=DBGPT_API_KEY,
base_url="http://localhost:5670/api/v2"
)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": "Hello",
},
],
extra_body={
"chat_mode": "chat_knowledge",
"chat_param": SPACE_NAME,
},
stream=True,
max_tokens=2048,
)
for chunk in response:
delta_content = chunk.choices[0].delta.content
print(delta_content, end="", flush=True)
```
</TabItem>
</Tabs>
#### Chat Completion Response

View File

@@ -13,11 +13,13 @@ In the DB-GPT project, we defined a service-oriented multi-model management fram
```python
import openai
openai.api_key = "EMPTY"
openai.api_base = "http://127.0.0.1:8100/api/v1"
model = "vicuna-13b-v1.5"
model = "Qwen/QwQ-32B"
completion = openai.ChatCompletion.create(
client = openai.OpenAI(
api_key="EMPTY",
base_url="http://127.0.0.1:8100/api/v1",
)
completion = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": "hello"}]
)

View File

@@ -6,13 +6,37 @@ which supports concurrent requests and continuous batching inference.
## Install dependencies
```bash
pip install -e ".[llama_cpp_server]"
```
If you want to accelerate the inference speed, and you have a GPU, you can install the following dependencies:
You can add the extra `--extra "llama_cpp_server"` to install the dependencies needed for llama-cpp server.
If you has a Nvidia GPU, you can enable the CUDA support by setting the environment variable `CMAKE_ARGS="-DGGML_CUDA=ON"`.
```bash
CMAKE_ARGS="-DGGML_CUDA=ON" pip install -e ".[llama_cpp_server]"
# Use uv to install dependencies needed for llama-cpp
# Install core dependencies and select desired extensions
CMAKE_ARGS="-DGGML_CUDA=ON" uv sync --all-packages \
--extra "base" \
--extra "hf" \
--extra "cuda121" \
--extra "llama_cpp_server" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "quant_bnb" \
--extra "dbgpts"
```
Otherwise, run the following command to install dependencies without CUDA support.
```bash
# Use uv to install dependencies needed for llama-cpp
# Install core dependencies and select desired extensions
uv sync --all-packages \
--extra "base" \
--extra "hf" \
--extra "llama_cpp_server" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "quant_bnb" \
--extra "dbgpts"
```
## Download the model
@@ -25,16 +49,17 @@ wget https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5
## Modify configuration file
In the `.env` configuration file, modify the inference type of the model to start `llama.cpp` inference.
Just modify you config file to use the `llama.cpp.server` provider.
```bash
LLM_MODEL=qwen2.5-0.5b-instruct
LLM_MODEL_PATH=/tmp/qwen2.5-0.5b-instruct-q4_k_m.gguf
MODEL_TYPE=llama_cpp_server
```
## Start the DB-GPT server
```bash
python dbgpt/app/dbgpt_server.py
```toml
# Model Configurations
[models]
[[models.llms]]
name = "qwen2.5-0.5b-instruct-q4_k_m.gguf"
provider = "llama.cpp.server"
# If not provided, the model will be downloaded from the Hugging Face model hub
# uncomment the following line to specify the model path in the local file system
# https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
# path = "the-model-path-in-the-local-file-system"
path = "/tmp/qwen2.5-0.5b-instruct-q4_k_m.gguf"
```

View File

@@ -1,7 +1,10 @@
# ProxyLLMs
DB-GPT can be deployed on servers with lower hardware through proxy LLMs, and now dbgpt support many proxy llms, such as OpenAI、Azure、Wenxin、Tongyi、Zhipu and so on.
# Proxy LLMs
### Proxy model
DB-GPT can be deployed on servers with lower hardware requirements through proxy LLMs. DB-GPT supports many proxy LLMs, such as OpenAI, Azure, DeepSeek, Ollama, and more.
## Installation and Configuration
Installing DB-GPT with proxy LLM support requires using the `uv` package manager for a faster and more stable dependency management experience.
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
@@ -9,156 +12,274 @@ import TabItem from '@theme/TabItem';
<Tabs
defaultValue="openai"
values={[
{label: 'Open AI', value: 'openai'},
{label: 'Azure', value: 'Azure'},
{label: 'OpenAI', value: 'openai'},
{label: 'Azure', value: 'azure'},
{label: 'DeepSeek', value: 'deepseek'},
{label: 'Ollama', value: 'ollama'},
{label: 'Qwen', value: 'qwen'},
{label: 'ChatGLM', value: 'chatglm'},
{label: 'WenXin', value: 'erniebot'},
]}>
<TabItem value="openai" label="open ai">
Install dependencies
<TabItem value="openai" label="OpenAI">
```python
pip install -e ".[openai]"
### Install Dependencies
```bash
# Use uv to install dependencies needed for OpenAI proxy
uv sync --all-packages \
--extra "base" \
--extra "proxy_openai" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "dbgpts"
```
Download embedding model
### Configure OpenAI
```python
cd DB-GPT
mkdir models and cd models
git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
Edit the `configs/dbgpt-proxy-openai.toml` configuration file to specify your OpenAI API key:
```toml
# Model Configurations
[models]
[[models.llms]]
name = "gpt-3.5-turbo"
provider = "proxy/openai"
api_key = "your-openai-api-key"
# Optional: To use GPT-4, change the name to "gpt-4" or "gpt-4-turbo"
[[models.embeddings]]
name = "text-embedding-ada-002"
provider = "proxy/openai"
api_key = "your-openai-api-key"
```
Configure the proxy and modify LLM_MODEL, PROXY_API_URL and API_KEY in the `.env`file
### Run Webserver
```python
# .env
LLM_MODEL=chatgpt_proxyllm
PROXY_API_KEY={your-openai-sk}
PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
# If you use gpt-4
# PROXYLLM_BACKEND=gpt-4
```bash
uv run dbgpt start webserver --config configs/dbgpt-proxy-openai.toml
```
</TabItem>
<TabItem value="azure" label="Azure">
<TabItem value="Azure" label="Azure">
Install dependencies
### Install Dependencies
```python
pip install -e ".[openai]"
```bash
# Use uv to install dependencies needed for Azure OpenAI proxy
uv sync --all-packages \
--extra "base" \
--extra "proxy_openai" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "dbgpts"
```
Download embedding model
### Configure Azure OpenAI
```python
cd DB-GPT
mkdir models and cd models
git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese # change this to other embedding model if needed.
Edit the `configs/dbgpt-proxy-azure.toml` configuration file to specify your Azure OpenAI settings:
```toml
# Model Configurations
[models]
[[models.llms]]
name = "gpt-35-turbo" # or your deployment model name
provider = "proxy/openai"
api_base = "https://your-resource-name.openai.azure.com/"
api_key = "your-azure-openai-api-key"
api_version = "2023-05-15" # or your specific API version
api_type = "azure"
```
Configure the proxy and modify LLM_MODEL, PROXY_API_URL and API_KEY in the `.env`file
### Run Webserver
```python
# .env
LLM_MODEL=proxyllm
PROXY_API_KEY=xxxx
PROXY_API_BASE=https://xxxxxx.openai.azure.com/
PROXY_API_TYPE=azure
PROXY_SERVER_URL=xxxx
PROXY_API_VERSION=2023-05-15
PROXYLLM_BACKEND=gpt-35-turbo
API_AZURE_DEPLOYMENT=xxxx[deployment_name]
```bash
uv run dbgpt start webserver --config configs/dbgpt-proxy-azure.toml
```
</TabItem>
<TabItem value="deepseek" label="DeepSeek">
<TabItem value="qwen" label="通义千问">
Install dependencies
### Install Dependencies
```python
pip install dashscope
```bash
# Use uv to install dependencies needed for DeepSeek proxy
uv sync --all-packages \
--extra "base" \
--extra "proxy_openai" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "dbgpts"
```
Download embedding model
### Configure DeepSeek
```python
cd DB-GPT
mkdir models and cd models
Edit the `configs/dbgpt-proxy-deepseek.toml` configuration file to specify your DeepSeek API key:
# embedding model
git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
or
git clone https://huggingface.co/moka-ai/m3e-large
```toml
# Model Configurations
[models]
[[models.llms]]
# name = "deepseek-chat"
name = "deepseek-reasoner"
provider = "proxy/deepseek"
api_key = "your-deepseek-api-key"
```
Configure the proxy and modify LLM_MODEL, PROXY_API_URL and API_KEY in the `.env`file
### Run Webserver
```python
# .env
# Aliyun tongyiqianwen
LLM_MODEL=tongyi_proxyllm
TONGYI_PROXY_API_KEY={your-tongyi-sk}
PROXY_SERVER_URL={your_service_url}
```bash
uv run dbgpt start webserver --config configs/dbgpt-proxy-deepseek.toml
```
</TabItem>
<TabItem value="chatglm" label="chatglm" >
Install dependencies
<TabItem value="ollama" label="Ollama">
```python
pip install zhipuai
### Install Dependencies
```bash
# Use uv to install dependencies needed for Ollama proxy
uv sync --all-packages \
--extra "base" \
--extra "proxy_ollama" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "dbgpts"
```
Download embedding model
### Configure Ollama
```python
cd DB-GPT
mkdir models and cd models
Edit the `configs/dbgpt-proxy-ollama.toml` configuration file to specify your Ollama API base:
# embedding model
git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
or
git clone https://huggingface.co/moka-ai/m3e-large
```toml
# Model Configurations
[models]
[[models.llms]]
name = "llama3" # or any other model available in your Ollama instance
provider = "proxy/ollama"
api_base = "http://localhost:11434" # your-ollama-api-base
[[models.embeddings]]
name = "nomic-embed-text" # or any other embedding model in Ollama
provider = "proxy/ollama"
api_base = "http://localhost:11434" # your-ollama-api-base
```
Configure the proxy and modify LLM_MODEL, PROXY_API_URL and API_KEY in the `.env`file
### Run Webserver
```python
# .env
LLM_MODEL=zhipu_proxyllm
PROXY_SERVER_URL={your_service_url}
ZHIPU_MODEL_VERSION={version}
ZHIPU_PROXY_API_KEY={your-zhipu-sk}
```bash
uv run dbgpt start webserver --config configs/dbgpt-proxy-ollama.toml
```
</TabItem>
<TabItem value="qwen" label="Qwen (Tongyi)">
<TabItem value="erniebot" label="文心一言" default>
### Install Dependencies
Download embedding model
```python
cd DB-GPT
mkdir models and cd models
# embedding model
git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
or
git clone https://huggingface.co/moka-ai/m3e-large
```bash
# Use uv to install dependencies needed for Aliyun Qwen (Tongyi) proxy
uv sync --all-packages \
--extra "base" \
--extra "proxy_tongyi" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "dbgpts"
```
Configure the proxy and modify LLM_MODEL, MODEL_VERSION, API_KEY and API_SECRET in the `.env`file
### Configure Qwen
```python
# .env
LLM_MODEL=wenxin_proxyllm
WEN_XIN_MODEL_VERSION={version} # ERNIE-Bot or ERNIE-Bot-turbo
WEN_XIN_API_KEY={your-wenxin-sk}
WEN_XIN_API_SECRET={your-wenxin-sct}
Create or edit a configuration file (e.g., `configs/dbgpt-proxy-tongyi.toml`):
```toml
# Model Configurations
[models]
[[models.llms]]
name = "qwen-turbo" # or qwen-max, qwen-plus
provider = "proxy/tongyi"
api_key = "your-tongyi-api-key"
```
### Run Webserver
```bash
uv run dbgpt start webserver --config configs/dbgpt-proxy-tongyi.toml
```
</TabItem>
<TabItem value="chatglm" label="ChatGLM (Zhipu)">
### Install Dependencies
```bash
# Use uv to install dependencies needed for Zhipu (ChatGLM) proxy
uv sync --all-packages \
--extra "base" \
--extra "proxy_zhipu" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "dbgpts"
```
### Configure ChatGLM
Create or edit a configuration file (e.g., `configs/dbgpt-proxy-zhipu.toml`):
```toml
# Model Configurations
[models]
[[models.llms]]
name = "glm-4" # or other available model versions
provider = "proxy/zhipu"
api_key = "your-zhipu-api-key"
```
### Run Webserver
```bash
uv run dbgpt start webserver --config configs/dbgpt-proxy-zhipu.toml
```
</TabItem>
<TabItem value="erniebot" label="WenXin (Ernie)">
### Install Dependencies
```bash
# Use uv to install dependencies needed for Baidu WenXin proxy
uv sync --all-packages \
--extra "base" \
--extra "proxy_openai" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "dbgpts"
```
### Configure WenXin
Create or edit a configuration file (e.g., `configs/dbgpt-proxy-wenxin.toml`):
```toml
# Model Configurations
[models]
[[models.llms]]
name = "ERNIE-Bot-4.0" # or ernie-bot, ernie-bot-turbo
provider = "proxy/wenxin"
api_key = "your-wenxin-api-key"
api_secret = "your-wenxin-api-secret"
```
### Run Webserver
```bash
uv run dbgpt start webserver --config configs/dbgpt-proxy-wenxin.toml
```
</TabItem>
</Tabs>
:::info note
If you are in the China region, you can add `--index-url=https://pypi.tuna.tsinghua.edu.cn/simple` at the end of the `uv sync` command for faster package downloads.
:::
⚠️ Be careful not to overwrite the contents of the `.env` configuration file
:::
## Visit Website
After starting the webserver, open your browser and visit [`http://localhost:5670`](http://localhost:5670)

View File

@@ -38,9 +38,27 @@ Chat
curl http://127.0.0.1:8100/api/v1/chat/completions \
-H "Authorization: Bearer EMPTY" \
-H "Content-Type: application/json" \
-d '{"model": "glm-4-9b-chat", "messages": [{"role": "user", "content": "hello"}]}'
-d '{
"model": "Qwen/Qwen2.5-Coder-32B-Instruct",
"messages": [{"role": "user", "content": "hello"}]
}'
```
:::tip
Stream Chat
:::
```bash
curl http://127.0.0.1:8100/api/v1/chat/completions \
-H "Authorization: Bearer EMPTY" \
-H "Content-Type: application/json" \
-d '{
"model": "Qwen/Qwen2.5-Coder-32B-Instruct",
"stream": true,
"messages": [{"role": "user", "content": "hello"}]
}'
```
:::tip
Embedding
:::
@@ -49,7 +67,7 @@ curl http://127.0.0.1:8100/api/v1/embeddings \
-H "Authorization: Bearer EMPTY" \
-H "Content-Type: application/json" \
-d '{
"model": "text2vec",
"model": "BAAI/bge-large-zh-v1.5",
"input": "Hello world!"
}'
```
@@ -59,11 +77,13 @@ curl http://127.0.0.1:8100/api/v1/embeddings \
```bash
import openai
openai.api_key = "EMPTY"
openai.api_base = "http://127.0.0.1:8100/api/v1"
model = "glm-4-9b-chat"
model = "Qwen/Qwen2.5-Coder-32B-Instruct"
completion = openai.ChatCompletion.create(
client = openai.OpenAI(
api_key="EMPTY",
base_url="http://127.0.0.1:8100/api/v1",
)
completion = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": "hello"}]
)

View File

@@ -20,22 +20,31 @@ ollama pull nomic-embed-text
3. install ollama package.
```bash
pip install ollama
# Use uv to install dependencies needed for Ollama proxy
uv sync --all-packages \
--extra "base" \
--extra "proxy_ollama" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "dbgpts"
```
### Use ollama proxy model in DB-GPT `.env` file
### Configure the model
```bash
LLM_MODEL=ollama_proxyllm
PROXY_SERVER_URL=http://127.0.0.1:11434
PROXYLLM_BACKEND="qwen:0.5b"
PROXY_API_KEY=not_used
EMBEDDING_MODEL=proxy_ollama
proxy_ollama_proxy_server_url=http://127.0.0.1:11434
proxy_ollama_proxy_backend="nomic-embed-text:latest"
```
Modify you toml config file to use the `ollama` provider.
### run dbgpt server
```bash
python dbgpt/app/dbgpt_server.py
```toml
# Model Configurations
[models]
[[models.llms]]
name = "qwen:0.5b"
provider = "proxy/ollama"
api_base = "http://localhost:11434"
api_key = ""
[[models.embeddings]]
name = "bge-m3:latest"
provider = "proxy/ollama"
api_url = "http://localhost:11434"
api_key = ""
```

View File

@@ -2,19 +2,35 @@
DB-GPT supports [vLLM](https://github.com/vllm-project/vllm) inference, a fast and easy-to-use LLM inference and service library.
## Install dependencies
`vLLM` is an optional dependency in DB-GPT. You can install it manually through the following command.
`vLLM` is an optional dependency in DB-GPT. You can install it by adding the extra `--extra "vllm"` when installing dependencies.
```bash
pip install -e ".[vllm]"
# Use uv to install dependencies needed for vllm
# Install core dependencies and select desired extensions
uv sync --all-packages \
--extra "base" \
--extra "hf" \
--extra "cuda121" \
--extra "vllm" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "quant_bnb" \
--extra "dbgpts"
```
## Modify configuration file
In the `.env` configuration file, modify the inference type of the model to start `vllm` inference.
```bash
LLM_MODEL=glm-4-9b-chat
MODEL_TYPE=vllm
# modify the following configuration if you possess GPU resources
# gpu_memory_utilization=0.8
After installing the dependencies, you can modify your configuration file to use the `vllm` provider.
```toml
# Model Configurations
[models]
[[models.llms]]
name = "THUDM/glm-4-9b-chat-hf"
provider = "vllm"
# If not provided, the model will be downloaded from the Hugging Face model hub
# uncomment the following line to specify the model path in the local file system
# path = "the-model-path-in-the-local-file-system"
```
For more information about the list of models supported by `vLLM`, please refer to the [vLLM supported model document](https://docs.vllm.ai/en/latest/models/supported_models.html#supported-models).

View File

@@ -85,6 +85,12 @@ uv sync --all-packages \
--extra "dbgpts" \
--index-url=https://pypi.tuna.tsinghua.edu.cn/simple
```
And we recommend you to configure you pypi index to environment variable `UV_INDEX_URL`
example:
```bash
echo "export UV_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple" >> ~/.bashrc
```
This tutorial assumes that you can establish network communication with the dependency download sources.
:::