mirror of
https://github.com/csunny/DB-GPT.git
synced 2026-01-29 21:49:35 +00:00
fix(model): Fix apiserver error (#2605)
This commit is contained in:
@@ -22,6 +22,7 @@ import TabItem from '@theme/TabItem';
|
||||
values={[
|
||||
{label: 'Curl', value: 'curl'},
|
||||
{label: 'Python', value: 'python'},
|
||||
{label: 'Python(OpenAI SDK)', value: 'openai-sdk'},
|
||||
]
|
||||
}>
|
||||
|
||||
@@ -54,8 +55,40 @@ async for data in client.chat_stream(
|
||||
print(data)
|
||||
```
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="openai-sdk">
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
DBGPT_API_KEY = "dbgpt"
|
||||
|
||||
client = OpenAI(
|
||||
api_key=DBGPT_API_KEY,
|
||||
base_url="http://localhost:5670/api/v2"
|
||||
)
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello",
|
||||
},
|
||||
],
|
||||
extra_body={
|
||||
"chat_mode": "chat_normal",
|
||||
},
|
||||
stream=True,
|
||||
max_tokens=2048,
|
||||
)
|
||||
|
||||
for chunk in response:
|
||||
delta_content = chunk.choices[0].delta.content
|
||||
print(delta_content, end="", flush=True)
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
### Chat Completion Stream Response
|
||||
```commandline
|
||||
data: {"id": "chatcmpl-ba6fb52e-e5b2-11ee-b031-acde48001122", "model": "gpt-4o", "choices": [{"index": 0, "delta": {"role": "assistant", "content": "Hello"}}]}
|
||||
@@ -110,6 +143,8 @@ from dbgpt_client import Client
|
||||
DBGPT_API_KEY = "dbgpt"
|
||||
client = Client(api_key=DBGPT_API_KEY)
|
||||
response = await client.chat(model="gpt-4o" ,messages="hello")
|
||||
print(response)
|
||||
await client.aclose()
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
@@ -21,6 +21,7 @@ import TabItem from '@theme/TabItem';
|
||||
values={[
|
||||
{label: 'Curl', value: 'curl'},
|
||||
{label: 'Python', value: 'python'},
|
||||
{label: 'Python(OpenAI SDK)', value: 'openai-sdk'},
|
||||
]
|
||||
}>
|
||||
|
||||
@@ -56,6 +57,40 @@ res = client.chat(
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="openai-sdk">
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
DBGPT_API_KEY = "dbgpt"
|
||||
DB_NAME="{your_db_name}"
|
||||
|
||||
client = OpenAI(
|
||||
api_key=DBGPT_API_KEY,
|
||||
base_url="http://localhost:5670/api/v2"
|
||||
)
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello",
|
||||
},
|
||||
],
|
||||
extra_body={
|
||||
"chat_mode": "chat_data",
|
||||
"chat_param": DB_NAME,
|
||||
},
|
||||
stream=True,
|
||||
max_tokens=2048,
|
||||
)
|
||||
|
||||
for chunk in response:
|
||||
delta_content = chunk.choices[0].delta.content
|
||||
print(delta_content, end="", flush=True)
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
#### Chat Completion Response
|
||||
|
||||
@@ -21,6 +21,7 @@ import TabItem from '@theme/TabItem';
|
||||
values={[
|
||||
{label: 'Curl', value: 'curl'},
|
||||
{label: 'Python', value: 'python'},
|
||||
{label: 'Python(OpenAI SDK)', value: 'openai-sdk'},
|
||||
]
|
||||
}>
|
||||
|
||||
@@ -34,7 +35,7 @@ curl -X POST "http://localhost:5670/api/v2/chat/completions" \
|
||||
-H "Authorization: Bearer $DBGPT_API_KEY" \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"messages\":\"Hello\",\"model\":\"chatgpt_proxyllm\", \"chat_mode\": \"chat_flow\", \"chat_param\": \"$FLOW_ID\"}"
|
||||
-d "{\"messages\":\"Hello\",\"model\":\"gpt-4o\", \"chat_mode\": \"chat_flow\", \"chat_param\": \"$FLOW_ID\"}"
|
||||
|
||||
```
|
||||
</TabItem>
|
||||
@@ -50,18 +51,53 @@ FLOW_ID="{YOUR_FLOW_ID}"
|
||||
client = Client(api_key=DBGPT_API_KEY)
|
||||
async for data in client.chat_stream(
|
||||
messages="Introduce AWEL",
|
||||
model="chatgpt_proxyllm",
|
||||
model="gpt-4o",
|
||||
chat_mode="chat_flow",
|
||||
chat_param=FLOW_ID
|
||||
):
|
||||
print(data)
|
||||
```
|
||||
</TabItem>
|
||||
|
||||
|
||||
<TabItem value="openai-sdk">
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
DBGPT_API_KEY = "dbgpt"
|
||||
FLOW_ID="{YOUR_FLOW_ID}"
|
||||
|
||||
client = OpenAI(
|
||||
api_key=DBGPT_API_KEY,
|
||||
base_url="http://localhost:5670/api/v2"
|
||||
)
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello",
|
||||
},
|
||||
],
|
||||
extra_body={
|
||||
"chat_mode": "chat_flow",
|
||||
"chat_param": FLOW_ID,
|
||||
},
|
||||
stream=True,
|
||||
max_tokens=2048,
|
||||
)
|
||||
|
||||
for chunk in response:
|
||||
delta_content = chunk.choices[0].delta.content
|
||||
print(delta_content, end="", flush=True)
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
#### Chat Completion Stream Response
|
||||
```commandline
|
||||
data: {"id": "579f8862-fc4b-481e-af02-a127e6d036c8", "created": 1710918094, "model": "chatgpt_proxyllm", "choices": [{"index": 0, "delta": {"role": "assistant", "content": "\n\n"}}]}
|
||||
data: {"id": "579f8862-fc4b-481e-af02-a127e6d036c8", "created": 1710918094, "model": "gpt-4o", "choices": [{"index": 0, "delta": {"role": "assistant", "content": "\n\n"}}]}
|
||||
```
|
||||
### Create Flow
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
This is the introduction to the DB-GPT API documentation. You can interact with the API through HTTP requests from any language, via our official Python Client bindings.
|
||||
|
||||
# Authentication
|
||||
## Authentication
|
||||
The DB-GPT API uses API keys for authentication. Visit your API Keys page to retrieve the API key you'll use in your requests.
|
||||
|
||||
Production requests must be routed through your own backend server where your API key can be securely loaded from an environment variable or key management service.
|
||||
@@ -34,10 +34,18 @@ API_KEYS - The list of API keys that are allowed to access the API. Each of the
|
||||
API_KEYS=dbgpt
|
||||
```
|
||||
|
||||
## Installation
|
||||
## Using the DB-GPT official Python Client
|
||||
|
||||
If you use Python, you should install the official DB-GPT Client package from PyPI:
|
||||
|
||||
```bash
|
||||
pip install "dbgpt[client]>=0.5.2"
|
||||
pip install "dbgpt-client>=0.7.1rc0"
|
||||
```
|
||||
|
||||
## Using the OpenAI Python SDK
|
||||
|
||||
In some chat cases, you can use the OpenAI Python SDK to interact with the DB-GPT API. The DB-GPT API is compatible with the OpenAI API.
|
||||
|
||||
```bash
|
||||
pip install openai
|
||||
```
|
||||
@@ -21,6 +21,7 @@ import TabItem from '@theme/TabItem';
|
||||
values={[
|
||||
{label: 'Curl', value: 'curl'},
|
||||
{label: 'Python', value: 'python'},
|
||||
{label: 'Python(OpenAI SDK)', value: 'openai-sdk'},
|
||||
]
|
||||
}>
|
||||
|
||||
@@ -57,6 +58,41 @@ async for data in client.chat_stream(
|
||||
print(data)
|
||||
```
|
||||
</TabItem>
|
||||
|
||||
|
||||
<TabItem value="openai-sdk">
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
DBGPT_API_KEY = "dbgpt"
|
||||
SPACE_NAME="{YOUR_SPACE_NAME}"
|
||||
|
||||
client = OpenAI(
|
||||
api_key=DBGPT_API_KEY,
|
||||
base_url="http://localhost:5670/api/v2"
|
||||
)
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello",
|
||||
},
|
||||
],
|
||||
extra_body={
|
||||
"chat_mode": "chat_knowledge",
|
||||
"chat_param": SPACE_NAME,
|
||||
},
|
||||
stream=True,
|
||||
max_tokens=2048,
|
||||
)
|
||||
|
||||
for chunk in response:
|
||||
delta_content = chunk.choices[0].delta.content
|
||||
print(delta_content, end="", flush=True)
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
#### Chat Completion Response
|
||||
|
||||
@@ -13,11 +13,13 @@ In the DB-GPT project, we defined a service-oriented multi-model management fram
|
||||
|
||||
```python
|
||||
import openai
|
||||
openai.api_key = "EMPTY"
|
||||
openai.api_base = "http://127.0.0.1:8100/api/v1"
|
||||
model = "vicuna-13b-v1.5"
|
||||
model = "Qwen/QwQ-32B"
|
||||
|
||||
completion = openai.ChatCompletion.create(
|
||||
client = openai.OpenAI(
|
||||
api_key="EMPTY",
|
||||
base_url="http://127.0.0.1:8100/api/v1",
|
||||
)
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "hello"}]
|
||||
)
|
||||
|
||||
@@ -6,13 +6,37 @@ which supports concurrent requests and continuous batching inference.
|
||||
|
||||
## Install dependencies
|
||||
|
||||
```bash
|
||||
pip install -e ".[llama_cpp_server]"
|
||||
```
|
||||
If you want to accelerate the inference speed, and you have a GPU, you can install the following dependencies:
|
||||
You can add the extra `--extra "llama_cpp_server"` to install the dependencies needed for llama-cpp server.
|
||||
|
||||
If you has a Nvidia GPU, you can enable the CUDA support by setting the environment variable `CMAKE_ARGS="-DGGML_CUDA=ON"`.
|
||||
|
||||
```bash
|
||||
CMAKE_ARGS="-DGGML_CUDA=ON" pip install -e ".[llama_cpp_server]"
|
||||
# Use uv to install dependencies needed for llama-cpp
|
||||
# Install core dependencies and select desired extensions
|
||||
CMAKE_ARGS="-DGGML_CUDA=ON" uv sync --all-packages \
|
||||
--extra "base" \
|
||||
--extra "hf" \
|
||||
--extra "cuda121" \
|
||||
--extra "llama_cpp_server" \
|
||||
--extra "rag" \
|
||||
--extra "storage_chromadb" \
|
||||
--extra "quant_bnb" \
|
||||
--extra "dbgpts"
|
||||
```
|
||||
|
||||
Otherwise, run the following command to install dependencies without CUDA support.
|
||||
|
||||
```bash
|
||||
# Use uv to install dependencies needed for llama-cpp
|
||||
# Install core dependencies and select desired extensions
|
||||
uv sync --all-packages \
|
||||
--extra "base" \
|
||||
--extra "hf" \
|
||||
--extra "llama_cpp_server" \
|
||||
--extra "rag" \
|
||||
--extra "storage_chromadb" \
|
||||
--extra "quant_bnb" \
|
||||
--extra "dbgpts"
|
||||
```
|
||||
|
||||
## Download the model
|
||||
@@ -25,16 +49,17 @@ wget https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5
|
||||
|
||||
## Modify configuration file
|
||||
|
||||
In the `.env` configuration file, modify the inference type of the model to start `llama.cpp` inference.
|
||||
Just modify you config file to use the `llama.cpp.server` provider.
|
||||
|
||||
```bash
|
||||
LLM_MODEL=qwen2.5-0.5b-instruct
|
||||
LLM_MODEL_PATH=/tmp/qwen2.5-0.5b-instruct-q4_k_m.gguf
|
||||
MODEL_TYPE=llama_cpp_server
|
||||
```
|
||||
|
||||
## Start the DB-GPT server
|
||||
|
||||
```bash
|
||||
python dbgpt/app/dbgpt_server.py
|
||||
```toml
|
||||
# Model Configurations
|
||||
[models]
|
||||
[[models.llms]]
|
||||
name = "qwen2.5-0.5b-instruct-q4_k_m.gguf"
|
||||
provider = "llama.cpp.server"
|
||||
# If not provided, the model will be downloaded from the Hugging Face model hub
|
||||
# uncomment the following line to specify the model path in the local file system
|
||||
# https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
|
||||
# path = "the-model-path-in-the-local-file-system"
|
||||
path = "/tmp/qwen2.5-0.5b-instruct-q4_k_m.gguf"
|
||||
```
|
||||
@@ -1,7 +1,10 @@
|
||||
# ProxyLLMs
|
||||
DB-GPT can be deployed on servers with lower hardware through proxy LLMs, and now dbgpt support many proxy llms, such as OpenAI、Azure、Wenxin、Tongyi、Zhipu and so on.
|
||||
# Proxy LLMs
|
||||
|
||||
### Proxy model
|
||||
DB-GPT can be deployed on servers with lower hardware requirements through proxy LLMs. DB-GPT supports many proxy LLMs, such as OpenAI, Azure, DeepSeek, Ollama, and more.
|
||||
|
||||
## Installation and Configuration
|
||||
|
||||
Installing DB-GPT with proxy LLM support requires using the `uv` package manager for a faster and more stable dependency management experience.
|
||||
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
@@ -9,156 +12,274 @@ import TabItem from '@theme/TabItem';
|
||||
<Tabs
|
||||
defaultValue="openai"
|
||||
values={[
|
||||
{label: 'Open AI', value: 'openai'},
|
||||
{label: 'Azure', value: 'Azure'},
|
||||
{label: 'OpenAI', value: 'openai'},
|
||||
{label: 'Azure', value: 'azure'},
|
||||
{label: 'DeepSeek', value: 'deepseek'},
|
||||
{label: 'Ollama', value: 'ollama'},
|
||||
{label: 'Qwen', value: 'qwen'},
|
||||
{label: 'ChatGLM', value: 'chatglm'},
|
||||
{label: 'WenXin', value: 'erniebot'},
|
||||
]}>
|
||||
<TabItem value="openai" label="open ai">
|
||||
Install dependencies
|
||||
<TabItem value="openai" label="OpenAI">
|
||||
|
||||
```python
|
||||
pip install -e ".[openai]"
|
||||
### Install Dependencies
|
||||
|
||||
```bash
|
||||
# Use uv to install dependencies needed for OpenAI proxy
|
||||
uv sync --all-packages \
|
||||
--extra "base" \
|
||||
--extra "proxy_openai" \
|
||||
--extra "rag" \
|
||||
--extra "storage_chromadb" \
|
||||
--extra "dbgpts"
|
||||
```
|
||||
|
||||
Download embedding model
|
||||
### Configure OpenAI
|
||||
|
||||
```python
|
||||
cd DB-GPT
|
||||
mkdir models and cd models
|
||||
git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
|
||||
Edit the `configs/dbgpt-proxy-openai.toml` configuration file to specify your OpenAI API key:
|
||||
|
||||
```toml
|
||||
# Model Configurations
|
||||
[models]
|
||||
[[models.llms]]
|
||||
name = "gpt-3.5-turbo"
|
||||
provider = "proxy/openai"
|
||||
api_key = "your-openai-api-key"
|
||||
# Optional: To use GPT-4, change the name to "gpt-4" or "gpt-4-turbo"
|
||||
|
||||
[[models.embeddings]]
|
||||
name = "text-embedding-ada-002"
|
||||
provider = "proxy/openai"
|
||||
api_key = "your-openai-api-key"
|
||||
```
|
||||
|
||||
Configure the proxy and modify LLM_MODEL, PROXY_API_URL and API_KEY in the `.env`file
|
||||
### Run Webserver
|
||||
|
||||
```python
|
||||
# .env
|
||||
LLM_MODEL=chatgpt_proxyllm
|
||||
PROXY_API_KEY={your-openai-sk}
|
||||
PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
|
||||
# If you use gpt-4
|
||||
# PROXYLLM_BACKEND=gpt-4
|
||||
```bash
|
||||
uv run dbgpt start webserver --config configs/dbgpt-proxy-openai.toml
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="azure" label="Azure">
|
||||
|
||||
<TabItem value="Azure" label="Azure">
|
||||
Install dependencies
|
||||
### Install Dependencies
|
||||
|
||||
```python
|
||||
pip install -e ".[openai]"
|
||||
```bash
|
||||
# Use uv to install dependencies needed for Azure OpenAI proxy
|
||||
uv sync --all-packages \
|
||||
--extra "base" \
|
||||
--extra "proxy_openai" \
|
||||
--extra "rag" \
|
||||
--extra "storage_chromadb" \
|
||||
--extra "dbgpts"
|
||||
```
|
||||
|
||||
Download embedding model
|
||||
### Configure Azure OpenAI
|
||||
|
||||
```python
|
||||
cd DB-GPT
|
||||
mkdir models and cd models
|
||||
git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese # change this to other embedding model if needed.
|
||||
Edit the `configs/dbgpt-proxy-azure.toml` configuration file to specify your Azure OpenAI settings:
|
||||
|
||||
```toml
|
||||
# Model Configurations
|
||||
[models]
|
||||
[[models.llms]]
|
||||
name = "gpt-35-turbo" # or your deployment model name
|
||||
provider = "proxy/openai"
|
||||
api_base = "https://your-resource-name.openai.azure.com/"
|
||||
api_key = "your-azure-openai-api-key"
|
||||
api_version = "2023-05-15" # or your specific API version
|
||||
api_type = "azure"
|
||||
```
|
||||
|
||||
Configure the proxy and modify LLM_MODEL, PROXY_API_URL and API_KEY in the `.env`file
|
||||
### Run Webserver
|
||||
|
||||
```python
|
||||
# .env
|
||||
LLM_MODEL=proxyllm
|
||||
PROXY_API_KEY=xxxx
|
||||
PROXY_API_BASE=https://xxxxxx.openai.azure.com/
|
||||
PROXY_API_TYPE=azure
|
||||
PROXY_SERVER_URL=xxxx
|
||||
PROXY_API_VERSION=2023-05-15
|
||||
PROXYLLM_BACKEND=gpt-35-turbo
|
||||
API_AZURE_DEPLOYMENT=xxxx[deployment_name]
|
||||
```bash
|
||||
uv run dbgpt start webserver --config configs/dbgpt-proxy-azure.toml
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="deepseek" label="DeepSeek">
|
||||
|
||||
<TabItem value="qwen" label="通义千问">
|
||||
Install dependencies
|
||||
### Install Dependencies
|
||||
|
||||
```python
|
||||
pip install dashscope
|
||||
```bash
|
||||
# Use uv to install dependencies needed for DeepSeek proxy
|
||||
uv sync --all-packages \
|
||||
--extra "base" \
|
||||
--extra "proxy_openai" \
|
||||
--extra "rag" \
|
||||
--extra "storage_chromadb" \
|
||||
--extra "dbgpts"
|
||||
```
|
||||
|
||||
Download embedding model
|
||||
### Configure DeepSeek
|
||||
|
||||
```python
|
||||
cd DB-GPT
|
||||
mkdir models and cd models
|
||||
Edit the `configs/dbgpt-proxy-deepseek.toml` configuration file to specify your DeepSeek API key:
|
||||
|
||||
# embedding model
|
||||
git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
|
||||
or
|
||||
git clone https://huggingface.co/moka-ai/m3e-large
|
||||
```toml
|
||||
# Model Configurations
|
||||
[models]
|
||||
[[models.llms]]
|
||||
# name = "deepseek-chat"
|
||||
name = "deepseek-reasoner"
|
||||
provider = "proxy/deepseek"
|
||||
api_key = "your-deepseek-api-key"
|
||||
```
|
||||
|
||||
Configure the proxy and modify LLM_MODEL, PROXY_API_URL and API_KEY in the `.env`file
|
||||
### Run Webserver
|
||||
|
||||
```python
|
||||
# .env
|
||||
# Aliyun tongyiqianwen
|
||||
LLM_MODEL=tongyi_proxyllm
|
||||
TONGYI_PROXY_API_KEY={your-tongyi-sk}
|
||||
PROXY_SERVER_URL={your_service_url}
|
||||
```bash
|
||||
uv run dbgpt start webserver --config configs/dbgpt-proxy-deepseek.toml
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="chatglm" label="chatglm" >
|
||||
Install dependencies
|
||||
<TabItem value="ollama" label="Ollama">
|
||||
|
||||
```python
|
||||
pip install zhipuai
|
||||
### Install Dependencies
|
||||
|
||||
```bash
|
||||
# Use uv to install dependencies needed for Ollama proxy
|
||||
uv sync --all-packages \
|
||||
--extra "base" \
|
||||
--extra "proxy_ollama" \
|
||||
--extra "rag" \
|
||||
--extra "storage_chromadb" \
|
||||
--extra "dbgpts"
|
||||
```
|
||||
|
||||
Download embedding model
|
||||
### Configure Ollama
|
||||
|
||||
```python
|
||||
cd DB-GPT
|
||||
mkdir models and cd models
|
||||
Edit the `configs/dbgpt-proxy-ollama.toml` configuration file to specify your Ollama API base:
|
||||
|
||||
# embedding model
|
||||
git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
|
||||
or
|
||||
git clone https://huggingface.co/moka-ai/m3e-large
|
||||
```toml
|
||||
# Model Configurations
|
||||
[models]
|
||||
[[models.llms]]
|
||||
name = "llama3" # or any other model available in your Ollama instance
|
||||
provider = "proxy/ollama"
|
||||
api_base = "http://localhost:11434" # your-ollama-api-base
|
||||
|
||||
[[models.embeddings]]
|
||||
name = "nomic-embed-text" # or any other embedding model in Ollama
|
||||
provider = "proxy/ollama"
|
||||
api_base = "http://localhost:11434" # your-ollama-api-base
|
||||
```
|
||||
|
||||
Configure the proxy and modify LLM_MODEL, PROXY_API_URL and API_KEY in the `.env`file
|
||||
### Run Webserver
|
||||
|
||||
```python
|
||||
# .env
|
||||
LLM_MODEL=zhipu_proxyllm
|
||||
PROXY_SERVER_URL={your_service_url}
|
||||
ZHIPU_MODEL_VERSION={version}
|
||||
ZHIPU_PROXY_API_KEY={your-zhipu-sk}
|
||||
```bash
|
||||
uv run dbgpt start webserver --config configs/dbgpt-proxy-ollama.toml
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="qwen" label="Qwen (Tongyi)">
|
||||
|
||||
<TabItem value="erniebot" label="文心一言" default>
|
||||
### Install Dependencies
|
||||
|
||||
Download embedding model
|
||||
|
||||
```python
|
||||
cd DB-GPT
|
||||
mkdir models and cd models
|
||||
|
||||
# embedding model
|
||||
git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
|
||||
or
|
||||
git clone https://huggingface.co/moka-ai/m3e-large
|
||||
```bash
|
||||
# Use uv to install dependencies needed for Aliyun Qwen (Tongyi) proxy
|
||||
uv sync --all-packages \
|
||||
--extra "base" \
|
||||
--extra "proxy_tongyi" \
|
||||
--extra "rag" \
|
||||
--extra "storage_chromadb" \
|
||||
--extra "dbgpts"
|
||||
```
|
||||
|
||||
Configure the proxy and modify LLM_MODEL, MODEL_VERSION, API_KEY and API_SECRET in the `.env`file
|
||||
### Configure Qwen
|
||||
|
||||
```python
|
||||
# .env
|
||||
LLM_MODEL=wenxin_proxyllm
|
||||
WEN_XIN_MODEL_VERSION={version} # ERNIE-Bot or ERNIE-Bot-turbo
|
||||
WEN_XIN_API_KEY={your-wenxin-sk}
|
||||
WEN_XIN_API_SECRET={your-wenxin-sct}
|
||||
Create or edit a configuration file (e.g., `configs/dbgpt-proxy-tongyi.toml`):
|
||||
|
||||
```toml
|
||||
# Model Configurations
|
||||
[models]
|
||||
[[models.llms]]
|
||||
name = "qwen-turbo" # or qwen-max, qwen-plus
|
||||
provider = "proxy/tongyi"
|
||||
api_key = "your-tongyi-api-key"
|
||||
```
|
||||
|
||||
### Run Webserver
|
||||
|
||||
```bash
|
||||
uv run dbgpt start webserver --config configs/dbgpt-proxy-tongyi.toml
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="chatglm" label="ChatGLM (Zhipu)">
|
||||
|
||||
### Install Dependencies
|
||||
|
||||
```bash
|
||||
# Use uv to install dependencies needed for Zhipu (ChatGLM) proxy
|
||||
uv sync --all-packages \
|
||||
--extra "base" \
|
||||
--extra "proxy_zhipu" \
|
||||
--extra "rag" \
|
||||
--extra "storage_chromadb" \
|
||||
--extra "dbgpts"
|
||||
```
|
||||
|
||||
### Configure ChatGLM
|
||||
|
||||
Create or edit a configuration file (e.g., `configs/dbgpt-proxy-zhipu.toml`):
|
||||
|
||||
```toml
|
||||
# Model Configurations
|
||||
[models]
|
||||
[[models.llms]]
|
||||
name = "glm-4" # or other available model versions
|
||||
provider = "proxy/zhipu"
|
||||
api_key = "your-zhipu-api-key"
|
||||
```
|
||||
|
||||
### Run Webserver
|
||||
|
||||
```bash
|
||||
uv run dbgpt start webserver --config configs/dbgpt-proxy-zhipu.toml
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="erniebot" label="WenXin (Ernie)">
|
||||
|
||||
### Install Dependencies
|
||||
|
||||
```bash
|
||||
# Use uv to install dependencies needed for Baidu WenXin proxy
|
||||
uv sync --all-packages \
|
||||
--extra "base" \
|
||||
--extra "proxy_openai" \
|
||||
--extra "rag" \
|
||||
--extra "storage_chromadb" \
|
||||
--extra "dbgpts"
|
||||
```
|
||||
|
||||
### Configure WenXin
|
||||
|
||||
Create or edit a configuration file (e.g., `configs/dbgpt-proxy-wenxin.toml`):
|
||||
|
||||
```toml
|
||||
# Model Configurations
|
||||
[models]
|
||||
[[models.llms]]
|
||||
name = "ERNIE-Bot-4.0" # or ernie-bot, ernie-bot-turbo
|
||||
provider = "proxy/wenxin"
|
||||
api_key = "your-wenxin-api-key"
|
||||
api_secret = "your-wenxin-api-secret"
|
||||
```
|
||||
|
||||
### Run Webserver
|
||||
|
||||
```bash
|
||||
uv run dbgpt start webserver --config configs/dbgpt-proxy-wenxin.toml
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
:::info note
|
||||
If you are in the China region, you can add `--index-url=https://pypi.tuna.tsinghua.edu.cn/simple` at the end of the `uv sync` command for faster package downloads.
|
||||
:::
|
||||
|
||||
⚠️ Be careful not to overwrite the contents of the `.env` configuration file
|
||||
:::
|
||||
## Visit Website
|
||||
|
||||
After starting the webserver, open your browser and visit [`http://localhost:5670`](http://localhost:5670)
|
||||
@@ -38,9 +38,27 @@ Chat
|
||||
curl http://127.0.0.1:8100/api/v1/chat/completions \
|
||||
-H "Authorization: Bearer EMPTY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model": "glm-4-9b-chat", "messages": [{"role": "user", "content": "hello"}]}'
|
||||
-d '{
|
||||
"model": "Qwen/Qwen2.5-Coder-32B-Instruct",
|
||||
"messages": [{"role": "user", "content": "hello"}]
|
||||
}'
|
||||
```
|
||||
|
||||
:::tip
|
||||
Stream Chat
|
||||
:::
|
||||
```bash
|
||||
curl http://127.0.0.1:8100/api/v1/chat/completions \
|
||||
-H "Authorization: Bearer EMPTY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "Qwen/Qwen2.5-Coder-32B-Instruct",
|
||||
"stream": true,
|
||||
"messages": [{"role": "user", "content": "hello"}]
|
||||
}'
|
||||
```
|
||||
|
||||
|
||||
:::tip
|
||||
Embedding
|
||||
:::
|
||||
@@ -49,7 +67,7 @@ curl http://127.0.0.1:8100/api/v1/embeddings \
|
||||
-H "Authorization: Bearer EMPTY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "text2vec",
|
||||
"model": "BAAI/bge-large-zh-v1.5",
|
||||
"input": "Hello world!"
|
||||
}'
|
||||
```
|
||||
@@ -59,11 +77,13 @@ curl http://127.0.0.1:8100/api/v1/embeddings \
|
||||
|
||||
```bash
|
||||
import openai
|
||||
openai.api_key = "EMPTY"
|
||||
openai.api_base = "http://127.0.0.1:8100/api/v1"
|
||||
model = "glm-4-9b-chat"
|
||||
model = "Qwen/Qwen2.5-Coder-32B-Instruct"
|
||||
|
||||
completion = openai.ChatCompletion.create(
|
||||
client = openai.OpenAI(
|
||||
api_key="EMPTY",
|
||||
base_url="http://127.0.0.1:8100/api/v1",
|
||||
)
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "hello"}]
|
||||
)
|
||||
|
||||
@@ -20,22 +20,31 @@ ollama pull nomic-embed-text
|
||||
|
||||
3. install ollama package.
|
||||
```bash
|
||||
pip install ollama
|
||||
# Use uv to install dependencies needed for Ollama proxy
|
||||
uv sync --all-packages \
|
||||
--extra "base" \
|
||||
--extra "proxy_ollama" \
|
||||
--extra "rag" \
|
||||
--extra "storage_chromadb" \
|
||||
--extra "dbgpts"
|
||||
```
|
||||
|
||||
### Use ollama proxy model in DB-GPT `.env` file
|
||||
### Configure the model
|
||||
|
||||
```bash
|
||||
LLM_MODEL=ollama_proxyllm
|
||||
PROXY_SERVER_URL=http://127.0.0.1:11434
|
||||
PROXYLLM_BACKEND="qwen:0.5b"
|
||||
PROXY_API_KEY=not_used
|
||||
EMBEDDING_MODEL=proxy_ollama
|
||||
proxy_ollama_proxy_server_url=http://127.0.0.1:11434
|
||||
proxy_ollama_proxy_backend="nomic-embed-text:latest"
|
||||
```
|
||||
Modify you toml config file to use the `ollama` provider.
|
||||
|
||||
### run dbgpt server
|
||||
```bash
|
||||
python dbgpt/app/dbgpt_server.py
|
||||
```toml
|
||||
# Model Configurations
|
||||
[models]
|
||||
[[models.llms]]
|
||||
name = "qwen:0.5b"
|
||||
provider = "proxy/ollama"
|
||||
api_base = "http://localhost:11434"
|
||||
api_key = ""
|
||||
|
||||
[[models.embeddings]]
|
||||
name = "bge-m3:latest"
|
||||
provider = "proxy/ollama"
|
||||
api_url = "http://localhost:11434"
|
||||
api_key = ""
|
||||
```
|
||||
@@ -2,19 +2,35 @@
|
||||
DB-GPT supports [vLLM](https://github.com/vllm-project/vllm) inference, a fast and easy-to-use LLM inference and service library.
|
||||
|
||||
## Install dependencies
|
||||
`vLLM` is an optional dependency in DB-GPT. You can install it manually through the following command.
|
||||
`vLLM` is an optional dependency in DB-GPT. You can install it by adding the extra `--extra "vllm"` when installing dependencies.
|
||||
|
||||
```bash
|
||||
pip install -e ".[vllm]"
|
||||
# Use uv to install dependencies needed for vllm
|
||||
# Install core dependencies and select desired extensions
|
||||
uv sync --all-packages \
|
||||
--extra "base" \
|
||||
--extra "hf" \
|
||||
--extra "cuda121" \
|
||||
--extra "vllm" \
|
||||
--extra "rag" \
|
||||
--extra "storage_chromadb" \
|
||||
--extra "quant_bnb" \
|
||||
--extra "dbgpts"
|
||||
```
|
||||
|
||||
## Modify configuration file
|
||||
In the `.env` configuration file, modify the inference type of the model to start `vllm` inference.
|
||||
```bash
|
||||
LLM_MODEL=glm-4-9b-chat
|
||||
MODEL_TYPE=vllm
|
||||
# modify the following configuration if you possess GPU resources
|
||||
# gpu_memory_utilization=0.8
|
||||
|
||||
After installing the dependencies, you can modify your configuration file to use the `vllm` provider.
|
||||
|
||||
```toml
|
||||
# Model Configurations
|
||||
[models]
|
||||
[[models.llms]]
|
||||
name = "THUDM/glm-4-9b-chat-hf"
|
||||
provider = "vllm"
|
||||
# If not provided, the model will be downloaded from the Hugging Face model hub
|
||||
# uncomment the following line to specify the model path in the local file system
|
||||
# path = "the-model-path-in-the-local-file-system"
|
||||
```
|
||||
|
||||
For more information about the list of models supported by `vLLM`, please refer to the [vLLM supported model document](https://docs.vllm.ai/en/latest/models/supported_models.html#supported-models).
|
||||
|
||||
@@ -85,6 +85,12 @@ uv sync --all-packages \
|
||||
--extra "dbgpts" \
|
||||
--index-url=https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
```
|
||||
And we recommend you to configure you pypi index to environment variable `UV_INDEX_URL`
|
||||
example:
|
||||
```bash
|
||||
echo "export UV_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple" >> ~/.bashrc
|
||||
```
|
||||
|
||||
This tutorial assumes that you can establish network communication with the dependency download sources.
|
||||
:::
|
||||
|
||||
|
||||
Reference in New Issue
Block a user