fix(model): Fix apiserver error (#2605)

2026-01-29 21:49:35 +00:00 · 2025-04-10 10:23:49 +08:00
parent babc484261
commit 5ddd9e5bf8
39 changed files with 1300 additions and 243 deletions
--- a/docs/docs/api/chat.md
+++ b/docs/docs/api/chat.md
@@ -22,6 +22,7 @@ import TabItem from '@theme/TabItem';
  values={[
    {label: 'Curl', value: 'curl'},
    {label: 'Python', value: 'python'},
+    {label: 'Python(OpenAI SDK)', value: 'openai-sdk'},
  ]
 }>

@@ -54,8 +55,40 @@ async for data in client.chat_stream(
    print(data)
 ```
 </TabItem>
+
+<TabItem value="openai-sdk">
+
+```python
+from openai import OpenAI
+DBGPT_API_KEY = "dbgpt"
+
+client = OpenAI(
+    api_key=DBGPT_API_KEY,
+    base_url="http://localhost:5670/api/v2"
+)
+response = client.chat.completions.create(
+    model="gpt-4o",
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello",
+        },
+    ],
+    extra_body={
+        "chat_mode": "chat_normal",
+    },
+    stream=True,
+    max_tokens=2048,
+)
+
+for chunk in response:
+    delta_content = chunk.choices[0].delta.content
+    print(delta_content, end="", flush=True)
+```
+ </TabItem>
 </Tabs>

+
 ### Chat Completion Stream Response
 ```commandline
 data: {"id": "chatcmpl-ba6fb52e-e5b2-11ee-b031-acde48001122", "model": "gpt-4o", "choices": [{"index": 0, "delta": {"role": "assistant", "content": "Hello"}}]}
@@ -110,6 +143,8 @@ from dbgpt_client import Client
 DBGPT_API_KEY = "dbgpt"
 client = Client(api_key=DBGPT_API_KEY)
 response = await client.chat(model="gpt-4o" ,messages="hello")
+print(response)
+await client.aclose()
 ```
 </TabItem>
 </Tabs>
--- a/docs/docs/api/datasource.md
+++ b/docs/docs/api/datasource.md
@@ -21,6 +21,7 @@ import TabItem from '@theme/TabItem';
  values={[
    {label: 'Curl', value: 'curl'},
    {label: 'Python', value: 'python'},
+    {label: 'Python(OpenAI SDK)', value: 'openai-sdk'},
  ]
 }>

@@ -56,6 +57,40 @@ res = client.chat(
 )
 ```
 </TabItem>
+
+<TabItem value="openai-sdk">
+
+```python
+from openai import OpenAI
+
+DBGPT_API_KEY = "dbgpt"
+DB_NAME="{your_db_name}"
+
+client = OpenAI(
+    api_key=DBGPT_API_KEY,
+    base_url="http://localhost:5670/api/v2"
+)
+response = client.chat.completions.create(
+    model="gpt-4o",
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello",
+        },
+    ],
+    extra_body={
+        "chat_mode": "chat_data",
+        "chat_param": DB_NAME,
+    },
+    stream=True,
+    max_tokens=2048,
+)
+
+for chunk in response:
+    delta_content = chunk.choices[0].delta.content
+    print(delta_content, end="", flush=True)
+```
+ </TabItem>
 </Tabs>

 #### Chat Completion Response
--- a/docs/docs/api/flow.md
+++ b/docs/docs/api/flow.md
@@ -21,6 +21,7 @@ import TabItem from '@theme/TabItem';
  values={[
    {label: 'Curl', value: 'curl'},
    {label: 'Python', value: 'python'},
+    {label: 'Python(OpenAI SDK)', value: 'openai-sdk'},
  ]
 }>

@@ -34,7 +35,7 @@ curl -X POST "http://localhost:5670/api/v2/chat/completions" \
    -H "Authorization: Bearer $DBGPT_API_KEY" \
    -H "accept: application/json" \
    -H "Content-Type: application/json" \
-    -d "{\"messages\":\"Hello\",\"model\":\"chatgpt_proxyllm\", \"chat_mode\": \"chat_flow\", \"chat_param\": \"$FLOW_ID\"}"
+    -d "{\"messages\":\"Hello\",\"model\":\"gpt-4o\", \"chat_mode\": \"chat_flow\", \"chat_param\": \"$FLOW_ID\"}"

 ```
 </TabItem>
@@ -50,18 +51,53 @@ FLOW_ID="{YOUR_FLOW_ID}"
 client = Client(api_key=DBGPT_API_KEY)
 async for data in client.chat_stream(
    messages="Introduce AWEL", 
-    model="chatgpt_proxyllm", 
+    model="gpt-4o", 
    chat_mode="chat_flow", 
    chat_param=FLOW_ID
 ):
    print(data)
 ```
 </TabItem>
+
+
+<TabItem value="openai-sdk">
+
+```python
+from openai import OpenAI
+
+DBGPT_API_KEY = "dbgpt"
+FLOW_ID="{YOUR_FLOW_ID}"
+
+client = OpenAI(
+    api_key=DBGPT_API_KEY,
+    base_url="http://localhost:5670/api/v2"
+)
+response = client.chat.completions.create(
+    model="gpt-4o",
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello",
+        },
+    ],
+    extra_body={
+        "chat_mode": "chat_flow",
+        "chat_param": FLOW_ID,
+    },
+    stream=True,
+    max_tokens=2048,
+)
+
+for chunk in response:
+    delta_content = chunk.choices[0].delta.content
+    print(delta_content, end="", flush=True)
+```
+ </TabItem>
 </Tabs>

 #### Chat Completion Stream Response
 ```commandline
-data: {"id": "579f8862-fc4b-481e-af02-a127e6d036c8", "created": 1710918094, "model": "chatgpt_proxyllm", "choices": [{"index": 0, "delta": {"role": "assistant", "content": "\n\n"}}]}
+data: {"id": "579f8862-fc4b-481e-af02-a127e6d036c8", "created": 1710918094, "model": "gpt-4o", "choices": [{"index": 0, "delta": {"role": "assistant", "content": "\n\n"}}]}
 ```
 ### Create Flow

--- a/docs/docs/api/introduction.md
+++ b/docs/docs/api/introduction.md
@@ -2,7 +2,7 @@

 This is the introduction to the DB-GPT API documentation. You can interact with the API through HTTP requests from any language, via our official Python Client bindings.

-# Authentication
+## Authentication
 The DB-GPT API uses API keys for authentication. Visit your API Keys page to retrieve the API key you'll use in your requests.

 Production requests must be routed through your own backend server where your API key can be securely loaded from an environment variable or key management service.
@@ -34,10 +34,18 @@ API_KEYS - The list of API keys that are allowed to access the API. Each of the
 API_KEYS=dbgpt
 ```

-## Installation
+## Using the DB-GPT official Python Client
+
 If you use Python, you should install the official DB-GPT Client package from PyPI:

 ```bash
-pip install "dbgpt[client]>=0.5.2"
+pip install "dbgpt-client>=0.7.1rc0"
 ```

+## Using the OpenAI Python SDK
+
+In some chat cases, you can use the OpenAI Python SDK to interact with the DB-GPT API. The DB-GPT API is compatible with the OpenAI API.
+
+```bash
+pip install openai
+```
--- a/docs/docs/api/knowledge.md
+++ b/docs/docs/api/knowledge.md
@@ -21,6 +21,7 @@ import TabItem from '@theme/TabItem';
  values={[
    {label: 'Curl', value: 'curl'},
    {label: 'Python', value: 'python'},
+    {label: 'Python(OpenAI SDK)', value: 'openai-sdk'},
  ]
 }>

@@ -57,6 +58,41 @@ async for data in client.chat_stream(
    print(data)
 ```
 </TabItem>
+
+
+<TabItem value="openai-sdk">
+
+```python
+from openai import OpenAI
+
+DBGPT_API_KEY = "dbgpt"
+SPACE_NAME="{YOUR_SPACE_NAME}"
+
+client = OpenAI(
+    api_key=DBGPT_API_KEY,
+    base_url="http://localhost:5670/api/v2"
+)
+response = client.chat.completions.create(
+    model="gpt-4o",
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello",
+        },
+    ],
+    extra_body={
+        "chat_mode": "chat_knowledge",
+        "chat_param": SPACE_NAME,
+    },
+    stream=True,
+    max_tokens=2048,
+)
+
+for chunk in response:
+    delta_content = chunk.choices[0].delta.content
+    print(delta_content, end="", flush=True)
+```
+ </TabItem>
 </Tabs>

 #### Chat Completion Response
--- a/docs/docs/application/advanced_tutorial/api.md
+++ b/docs/docs/application/advanced_tutorial/api.md
@@ -13,11 +13,13 @@ In the DB-GPT project, we defined a service-oriented multi-model management fram

 ```python
 import openai
-openai.api_key = "EMPTY"
-openai.api_base = "http://127.0.0.1:8100/api/v1"
-model = "vicuna-13b-v1.5"
+model = "Qwen/QwQ-32B"

-completion = openai.ChatCompletion.create(
+client = openai.OpenAI(
+  api_key="EMPTY",
+  base_url="http://127.0.0.1:8100/api/v1",
+)
+completion = client.chat.completions.create(
  model=model,
  messages=[{"role": "user", "content": "hello"}]
 )
--- a/docs/docs/installation/advanced_usage/Llamacpp_server.md
+++ b/docs/docs/installation/advanced_usage/Llamacpp_server.md
@@ -6,13 +6,37 @@ which supports concurrent requests and continuous batching inference.

 ## Install dependencies

-```bash
-pip install -e ".[llama_cpp_server]"
-```
-If you want to accelerate the inference speed, and you have a GPU, you can install the following dependencies:
+You can add the extra `--extra "llama_cpp_server"` to install the dependencies needed for llama-cpp server.
+
+If you has a Nvidia GPU, you can enable the CUDA support by setting the environment variable `CMAKE_ARGS="-DGGML_CUDA=ON"`.

 ```bash
-CMAKE_ARGS="-DGGML_CUDA=ON" pip install -e ".[llama_cpp_server]"
+# Use uv to install dependencies needed for llama-cpp
+# Install core dependencies and select desired extensions
+CMAKE_ARGS="-DGGML_CUDA=ON" uv sync --all-packages \
+--extra "base" \
+--extra "hf" \
+--extra "cuda121" \
+--extra "llama_cpp_server" \
+--extra "rag" \
+--extra "storage_chromadb" \
+--extra "quant_bnb" \
+--extra "dbgpts"
+```
+
+Otherwise, run the following command to install dependencies without CUDA support.
+
+```bash
+# Use uv to install dependencies needed for llama-cpp
+# Install core dependencies and select desired extensions
+uv sync --all-packages \
+--extra "base" \
+--extra "hf" \
+--extra "llama_cpp_server" \
+--extra "rag" \
+--extra "storage_chromadb" \
+--extra "quant_bnb" \
+--extra "dbgpts"
 ```

 ## Download the model
@@ -25,16 +49,17 @@ wget https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5

 ## Modify configuration file

-In the `.env` configuration file, modify the inference type of the model to start `llama.cpp` inference.
+Just modify you config file to use the `llama.cpp.server` provider.

-```bash
-LLM_MODEL=qwen2.5-0.5b-instruct
-LLM_MODEL_PATH=/tmp/qwen2.5-0.5b-instruct-q4_k_m.gguf
-MODEL_TYPE=llama_cpp_server
-```
-
-## Start the DB-GPT server
-
-```bash
-python dbgpt/app/dbgpt_server.py
+```toml
+# Model Configurations
+[models]
+[[models.llms]]
+name = "qwen2.5-0.5b-instruct-q4_k_m.gguf"
+provider = "llama.cpp.server"
+# If not provided, the model will be downloaded from the Hugging Face model hub
+# uncomment the following line to specify the model path in the local file system
+# https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
+# path = "the-model-path-in-the-local-file-system"
+path = "/tmp/qwen2.5-0.5b-instruct-q4_k_m.gguf"
 ```
--- a/docs/docs/installation/advanced_usage/More_proxyllms.md
+++ b/docs/docs/installation/advanced_usage/More_proxyllms.md
@@ -1,7 +1,10 @@
-# ProxyLLMs
-DB-GPT can be deployed on servers with lower hardware through proxy LLMs, and now dbgpt support many proxy llms, such as OpenAI、Azure、Wenxin、Tongyi、Zhipu and so on.
+# Proxy LLMs

-### Proxy model
+DB-GPT can be deployed on servers with lower hardware requirements through proxy LLMs. DB-GPT supports many proxy LLMs, such as OpenAI, Azure, DeepSeek, Ollama, and more.
+
+## Installation and Configuration
+
+Installing DB-GPT with proxy LLM support requires using the `uv` package manager for a faster and more stable dependency management experience.

 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
@@ -9,156 +12,274 @@ import TabItem from '@theme/TabItem';
 <Tabs
  defaultValue="openai"
  values={[
-    {label: 'Open AI', value: 'openai'},
-    {label: 'Azure', value: 'Azure'},
+    {label: 'OpenAI', value: 'openai'},
+    {label: 'Azure', value: 'azure'},
+    {label: 'DeepSeek', value: 'deepseek'},
+    {label: 'Ollama', value: 'ollama'},
    {label: 'Qwen', value: 'qwen'},
    {label: 'ChatGLM', value: 'chatglm'},
    {label: 'WenXin', value: 'erniebot'},
  ]}>
-  <TabItem value="openai" label="open ai">
-  Install dependencies
+  <TabItem value="openai" label="OpenAI">

-```python
-pip install  -e ".[openai]"
+### Install Dependencies
+
+```bash
+# Use uv to install dependencies needed for OpenAI proxy
+uv sync --all-packages \
+--extra "base" \
+--extra "proxy_openai" \
+--extra "rag" \
+--extra "storage_chromadb" \
+--extra "dbgpts"
 ```

-Download embedding model
+### Configure OpenAI

-```python
-cd DB-GPT
-mkdir models and cd models
-git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
+Edit the `configs/dbgpt-proxy-openai.toml` configuration file to specify your OpenAI API key:
+
+```toml
+# Model Configurations
+[models]
+[[models.llms]]
+name = "gpt-3.5-turbo"
+provider = "proxy/openai"
+api_key = "your-openai-api-key"
+# Optional: To use GPT-4, change the name to "gpt-4" or "gpt-4-turbo"
+
+[[models.embeddings]]
+name = "text-embedding-ada-002"
+provider = "proxy/openai"
+api_key = "your-openai-api-key"
 ```

-Configure the proxy and modify LLM_MODEL, PROXY_API_URL and API_KEY in the `.env`file
+### Run Webserver

-```python
-# .env
-LLM_MODEL=chatgpt_proxyllm
-PROXY_API_KEY={your-openai-sk}
-PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
-# If you use gpt-4
-# PROXYLLM_BACKEND=gpt-4
+```bash
+uv run dbgpt start webserver --config configs/dbgpt-proxy-openai.toml
 ```
+
  </TabItem>
+  <TabItem value="azure" label="Azure">

-  <TabItem value="Azure" label="Azure">
-  Install dependencies
+### Install Dependencies

-```python
-pip install  -e ".[openai]"
+```bash
+# Use uv to install dependencies needed for Azure OpenAI proxy
+uv sync --all-packages \
+--extra "base" \
+--extra "proxy_openai" \
+--extra "rag" \
+--extra "storage_chromadb" \
+--extra "dbgpts"
 ```

-Download embedding model
+### Configure Azure OpenAI

-```python
-cd DB-GPT
-mkdir models and cd models
-git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese # change this to other embedding model if needed.
+Edit the `configs/dbgpt-proxy-azure.toml` configuration file to specify your Azure OpenAI settings:
+
+```toml
+# Model Configurations
+[models]
+[[models.llms]]
+name = "gpt-35-turbo"  # or your deployment model name
+provider = "proxy/openai"
+api_base = "https://your-resource-name.openai.azure.com/"
+api_key = "your-azure-openai-api-key"
+api_version = "2023-05-15"  # or your specific API version
+api_type = "azure"
 ```

-Configure the proxy and modify LLM_MODEL, PROXY_API_URL and API_KEY in the `.env`file
+### Run Webserver

-```python
-# .env
-LLM_MODEL=proxyllm
-PROXY_API_KEY=xxxx
-PROXY_API_BASE=https://xxxxxx.openai.azure.com/
-PROXY_API_TYPE=azure
-PROXY_SERVER_URL=xxxx
-PROXY_API_VERSION=2023-05-15
-PROXYLLM_BACKEND=gpt-35-turbo
-API_AZURE_DEPLOYMENT=xxxx[deployment_name]
+```bash
+uv run dbgpt start webserver --config configs/dbgpt-proxy-azure.toml
 ```
+
  </TabItem>
+  <TabItem value="deepseek" label="DeepSeek">

-  <TabItem value="qwen" label="通义千问">
-Install dependencies
+### Install Dependencies

-```python
-pip install dashscope
+```bash
+# Use uv to install dependencies needed for DeepSeek proxy
+uv sync --all-packages \
+--extra "base" \
+--extra "proxy_openai" \
+--extra "rag" \
+--extra "storage_chromadb" \
+--extra "dbgpts"
 ```

-Download embedding model
+### Configure DeepSeek

-```python
-cd DB-GPT
-mkdir models and cd models
+Edit the `configs/dbgpt-proxy-deepseek.toml` configuration file to specify your DeepSeek API key:

-# embedding model
-git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
-or
-git clone https://huggingface.co/moka-ai/m3e-large
+```toml
+# Model Configurations
+[models]
+[[models.llms]]
+# name = "deepseek-chat"
+name = "deepseek-reasoner"
+provider = "proxy/deepseek"
+api_key = "your-deepseek-api-key"
 ```

-Configure the proxy and modify LLM_MODEL, PROXY_API_URL and API_KEY in the `.env`file
+### Run Webserver

-```python
-# .env
-# Aliyun tongyiqianwen
-LLM_MODEL=tongyi_proxyllm
-TONGYI_PROXY_API_KEY={your-tongyi-sk}
-PROXY_SERVER_URL={your_service_url}
+```bash
+uv run dbgpt start webserver --config configs/dbgpt-proxy-deepseek.toml
 ```
+
  </TabItem>
-  <TabItem value="chatglm" label="chatglm" >
-Install dependencies
+  <TabItem value="ollama" label="Ollama">

-```python
-pip install zhipuai
+### Install Dependencies
+
+```bash
+# Use uv to install dependencies needed for Ollama proxy
+uv sync --all-packages \
+--extra "base" \
+--extra "proxy_ollama" \
+--extra "rag" \
+--extra "storage_chromadb" \
+--extra "dbgpts"
 ```

-Download embedding model
+### Configure Ollama

-```python
-cd DB-GPT
-mkdir models and cd models
+Edit the `configs/dbgpt-proxy-ollama.toml` configuration file to specify your Ollama API base:

-# embedding model
-git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
-or
-git clone https://huggingface.co/moka-ai/m3e-large
+```toml
+# Model Configurations
+[models]
+[[models.llms]]
+name = "llama3"  # or any other model available in your Ollama instance
+provider = "proxy/ollama"
+api_base = "http://localhost:11434" # your-ollama-api-base
+
+[[models.embeddings]]
+name = "nomic-embed-text"  # or any other embedding model in Ollama
+provider = "proxy/ollama"
+api_base = "http://localhost:11434" # your-ollama-api-base
 ```

-Configure the proxy and modify LLM_MODEL, PROXY_API_URL and API_KEY in the `.env`file
+### Run Webserver

-```python
-# .env
-LLM_MODEL=zhipu_proxyllm
-PROXY_SERVER_URL={your_service_url}
-ZHIPU_MODEL_VERSION={version}
-ZHIPU_PROXY_API_KEY={your-zhipu-sk}
+```bash
+uv run dbgpt start webserver --config configs/dbgpt-proxy-ollama.toml
 ```
+
  </TabItem>
+  <TabItem value="qwen" label="Qwen (Tongyi)">

-  <TabItem value="erniebot" label="文心一言" default>
+### Install Dependencies

-Download embedding model
-
-```python
-cd DB-GPT
-mkdir models and cd models
-
-# embedding model
-git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
-or
-git clone https://huggingface.co/moka-ai/m3e-large
+```bash
+# Use uv to install dependencies needed for Aliyun Qwen (Tongyi) proxy
+uv sync --all-packages \
+--extra "base" \
+--extra "proxy_tongyi" \
+--extra "rag" \
+--extra "storage_chromadb" \
+--extra "dbgpts"
 ```

-Configure the proxy and modify LLM_MODEL, MODEL_VERSION, API_KEY and API_SECRET in the `.env`file
+### Configure Qwen

-```python
-# .env
-LLM_MODEL=wenxin_proxyllm
-WEN_XIN_MODEL_VERSION={version} # ERNIE-Bot or ERNIE-Bot-turbo
-WEN_XIN_API_KEY={your-wenxin-sk}
-WEN_XIN_API_SECRET={your-wenxin-sct}
+Create or edit a configuration file (e.g., `configs/dbgpt-proxy-tongyi.toml`):
+
+```toml
+# Model Configurations
+[models]
+[[models.llms]]
+name = "qwen-turbo"  # or qwen-max, qwen-plus
+provider = "proxy/tongyi"
+api_key = "your-tongyi-api-key"
 ```
+
+### Run Webserver
+
+```bash
+uv run dbgpt start webserver --config configs/dbgpt-proxy-tongyi.toml
+```
+
+  </TabItem>
+  <TabItem value="chatglm" label="ChatGLM (Zhipu)">
+
+### Install Dependencies
+
+```bash
+# Use uv to install dependencies needed for Zhipu (ChatGLM) proxy
+uv sync --all-packages \
+--extra "base" \
+--extra "proxy_zhipu" \
+--extra "rag" \
+--extra "storage_chromadb" \
+--extra "dbgpts"
+```
+
+### Configure ChatGLM
+
+Create or edit a configuration file (e.g., `configs/dbgpt-proxy-zhipu.toml`):
+
+```toml
+# Model Configurations
+[models]
+[[models.llms]]
+name = "glm-4"  # or other available model versions
+provider = "proxy/zhipu"
+api_key = "your-zhipu-api-key"
+```
+
+### Run Webserver
+
+```bash
+uv run dbgpt start webserver --config configs/dbgpt-proxy-zhipu.toml
+```
+
+  </TabItem>
+  <TabItem value="erniebot" label="WenXin (Ernie)">
+
+### Install Dependencies
+
+```bash
+# Use uv to install dependencies needed for Baidu WenXin proxy
+uv sync --all-packages \
+--extra "base" \
+--extra "proxy_openai" \
+--extra "rag" \
+--extra "storage_chromadb" \
+--extra "dbgpts"
+```
+
+### Configure WenXin
+
+Create or edit a configuration file (e.g., `configs/dbgpt-proxy-wenxin.toml`):
+
+```toml
+# Model Configurations
+[models]
+[[models.llms]]
+name = "ERNIE-Bot-4.0"  # or ernie-bot, ernie-bot-turbo
+provider = "proxy/wenxin"
+api_key = "your-wenxin-api-key"
+api_secret = "your-wenxin-api-secret"
+```
+
+### Run Webserver
+
+```bash
+uv run dbgpt start webserver --config configs/dbgpt-proxy-wenxin.toml
+```
+
  </TabItem>
 </Tabs>

-
 :::info note
+If you are in the China region, you can add `--index-url=https://pypi.tuna.tsinghua.edu.cn/simple` at the end of the `uv sync` command for faster package downloads.
+:::

-⚠️ Be careful not to overwrite the contents of the `.env` configuration file
-:::
+## Visit Website
+
+After starting the webserver, open your browser and visit [`http://localhost:5670`](http://localhost:5670)
--- a/docs/docs/installation/advanced_usage/OpenAI_SDK_call.md
+++ b/docs/docs/installation/advanced_usage/OpenAI_SDK_call.md
@@ -38,9 +38,27 @@ Chat
 curl http://127.0.0.1:8100/api/v1/chat/completions \
 -H "Authorization: Bearer EMPTY" \
 -H "Content-Type: application/json" \
-d '{"model": "glm-4-9b-chat", "messages": [{"role": "user", "content": "hello"}]}'
+-d '{
+  "model": "Qwen/Qwen2.5-Coder-32B-Instruct", 
+  "messages": [{"role": "user", "content": "hello"}]
+}'
 ```

+:::tip
+Stream Chat
+:::
+```bash
+curl http://127.0.0.1:8100/api/v1/chat/completions \
+-H "Authorization: Bearer EMPTY" \
+-H "Content-Type: application/json" \
+-d '{
+  "model": "Qwen/Qwen2.5-Coder-32B-Instruct", 
+  "stream": true,
+  "messages": [{"role": "user", "content": "hello"}]
+}'
+```
+
+
 :::tip
 Embedding 
 :::
@@ -49,7 +67,7 @@ curl http://127.0.0.1:8100/api/v1/embeddings \
 -H "Authorization: Bearer EMPTY" \
 -H "Content-Type: application/json" \
 -d '{
-    "model": "text2vec",
+    "model": "BAAI/bge-large-zh-v1.5",
    "input": "Hello world!"
 }'
 ```
@@ -59,11 +77,13 @@ curl http://127.0.0.1:8100/api/v1/embeddings \

 ```bash
 import openai
-openai.api_key = "EMPTY"
-openai.api_base = "http://127.0.0.1:8100/api/v1"
-model = "glm-4-9b-chat"
+model = "Qwen/Qwen2.5-Coder-32B-Instruct"

-completion = openai.ChatCompletion.create(
+client = openai.OpenAI(
+  api_key="EMPTY",
+  base_url="http://127.0.0.1:8100/api/v1",
+)
+completion = client.chat.completions.create(
  model=model,
  messages=[{"role": "user", "content": "hello"}]
 )
--- a/docs/docs/installation/advanced_usage/ollama.md
+++ b/docs/docs/installation/advanced_usage/ollama.md
@@ -20,22 +20,31 @@ ollama pull nomic-embed-text

 3. install ollama package.
 ```bash
-pip install ollama
+# Use uv to install dependencies needed for Ollama proxy
+uv sync --all-packages \
+--extra "base" \
+--extra "proxy_ollama" \
+--extra "rag" \
+--extra "storage_chromadb" \
+--extra "dbgpts"
 ```

-### Use ollama proxy model in DB-GPT `.env` file
+### Configure the model

-```bash
-LLM_MODEL=ollama_proxyllm
-PROXY_SERVER_URL=http://127.0.0.1:11434
-PROXYLLM_BACKEND="qwen:0.5b"
-PROXY_API_KEY=not_used
-EMBEDDING_MODEL=proxy_ollama
-proxy_ollama_proxy_server_url=http://127.0.0.1:11434
-proxy_ollama_proxy_backend="nomic-embed-text:latest"
-```
+Modify you toml config file to use the `ollama` provider.

-### run dbgpt server
-```bash
-python dbgpt/app/dbgpt_server.py
+```toml
+# Model Configurations
+[models]
+[[models.llms]]
+name = "qwen:0.5b"
+provider = "proxy/ollama"
+api_base = "http://localhost:11434"
+api_key = ""
+
+[[models.embeddings]]
+name = "bge-m3:latest"
+provider = "proxy/ollama"
+api_url = "http://localhost:11434"
+api_key = ""
 ```
--- a/docs/docs/installation/advanced_usage/vLLM_inference.md
+++ b/docs/docs/installation/advanced_usage/vLLM_inference.md
@@ -2,19 +2,35 @@
 DB-GPT supports [vLLM](https://github.com/vllm-project/vllm) inference, a fast and easy-to-use LLM inference and service library.

 ## Install dependencies
-`vLLM` is an optional dependency in DB-GPT. You can install it manually through the following command.
+`vLLM` is an optional dependency in DB-GPT. You can install it by adding the extra `--extra "vllm"` when installing dependencies.

 ```bash
-pip install -e ".[vllm]"
+# Use uv to install dependencies needed for vllm
+# Install core dependencies and select desired extensions
+uv sync --all-packages \
+--extra "base" \
+--extra "hf" \
+--extra "cuda121" \
+--extra "vllm" \
+--extra "rag" \
+--extra "storage_chromadb" \
+--extra "quant_bnb" \
+--extra "dbgpts"
 ```

 ## Modify configuration file
-In the `.env` configuration file, modify the inference type of the model to start `vllm` inference.
-```bash
-LLM_MODEL=glm-4-9b-chat
-MODEL_TYPE=vllm
-# modify the following configuration if you possess GPU resources
-# gpu_memory_utilization=0.8
+
+After installing the dependencies, you can modify your configuration file to use the `vllm` provider.
+
+```toml
+# Model Configurations
+[models]
+[[models.llms]]
+name = "THUDM/glm-4-9b-chat-hf"
+provider = "vllm"
+# If not provided, the model will be downloaded from the Hugging Face model hub
+# uncomment the following line to specify the model path in the local file system
+# path = "the-model-path-in-the-local-file-system"
 ```

 For more information about the list of models supported by `vLLM`, please refer to the [vLLM supported model document](https://docs.vllm.ai/en/latest/models/supported_models.html#supported-models).
--- a/docs/docs/quickstart.md
+++ b/docs/docs/quickstart.md
@@ -85,6 +85,12 @@ uv sync --all-packages \
 --extra "dbgpts" \
 --index-url=https://pypi.tuna.tsinghua.edu.cn/simple
 ```
+And we recommend you to configure you pypi index to environment variable `UV_INDEX_URL`
+example:
+```bash
+echo "export UV_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple" >> ~/.bashrc
+```
+
 This tutorial assumes that you can establish network communication with the dependency download sources.
 :::