From 2abd68d6c0aabc2cbd7d1089815dd268533706d4 Mon Sep 17 00:00:00 2001
From: Fangyin Cheng <staneyffer@gmail.com>
Date: Tue, 29 Apr 2025 09:55:28 +0800
Subject: [PATCH] feat(model): Support Qwen3 models (#2664)

---
 README.ja.md                                  |  5 +
 README.md                                     |  5 +
 README.zh.md                                  |  5 +
 configs/dbgpt-local-qwen3.example.toml        | 37 ++++++++
 docs/blog/2025-04-29-db-gpt-qwen3-support.md  | 95 +++++++++++++++++++
 packages/dbgpt-core/pyproject.toml            |  7 ++
 .../src/dbgpt/core/interface/media.py         | 13 ++-
 .../src/dbgpt/core/interface/message.py       | 10 +-
 .../src/dbgpt/model/adapter/base.py           | 22 ++++-
 .../src/dbgpt/model/adapter/hf_adapter.py     | 58 +++++++++++
 .../model/cluster/worker/default_worker.py    | 11 ++-
 11 files changed, 257 insertions(+), 11 deletions(-)
 create mode 100644 configs/dbgpt-local-qwen3.example.toml
 create mode 100644 docs/blog/2025-04-29-db-gpt-qwen3-support.md

diff --git a/README.ja.md b/README.ja.md
index d1f90faf6..031081e76 100644
--- a/README.ja.md
+++ b/README.ja.md
@@ -162,6 +162,11 @@ DB-GPTのアーキテクチャは以下の図に示されています：
   私たちは、LLaMA/LLaMA2、Baichuan、ChatGLM、Wenxin、Tongyi、Zhipuなど、オープンソースおよびAPIエージェントからの数十の大規模言語モデル（LLM）を含む幅広いモデルをサポートしています。
 
   - ニュース
+    - 🔥🔥🔥  [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
+    - 🔥🔥🔥  [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
+    - 🔥🔥🔥  [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
+    - 🔥🔥🔥  [GLM-Z1-32B-0414](https://huggingface.co/THUDM/GLM-Z1-32B-0414)
+    - 🔥🔥🔥  [GLM-4-32B-0414](https://huggingface.co/THUDM/GLM-4-32B-0414)
     - 🔥🔥🔥  [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B)
     - 🔥🔥🔥  [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
     - 🔥🔥🔥  [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)
diff --git a/README.md b/README.md
index 12539c8d1..eb1766e1a 100644
--- a/README.md
+++ b/README.md
@@ -169,6 +169,11 @@ At present, we have introduced several key features to showcase our current capa
   We offer extensive model support, including dozens of large language models (LLMs) from both open-source and API agents, such as LLaMA/LLaMA2, Baichuan, ChatGLM, Wenxin, Tongyi, Zhipu, and many more. 
 
   - News
+    - 🔥🔥🔥  [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
+    - 🔥🔥🔥  [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
+    - 🔥🔥🔥  [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
+    - 🔥🔥🔥  [GLM-Z1-32B-0414](https://huggingface.co/THUDM/GLM-Z1-32B-0414)
+    - 🔥🔥🔥  [GLM-4-32B-0414](https://huggingface.co/THUDM/GLM-4-32B-0414)
     - 🔥🔥🔥  [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B)
     - 🔥🔥🔥  [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
     - 🔥🔥🔥  [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)
diff --git a/README.zh.md b/README.zh.md
index 1cfc2dae1..cbeab84d9 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -163,6 +163,11 @@
   海量模型支持，包括开源、API代理等几十种大语言模型。如LLaMA/LLaMA2、Baichuan、ChatGLM、文心、通义、智谱等。当前已支持如下模型: 
 
   - 新增支持模型
+    - 🔥🔥🔥  [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
+    - 🔥🔥🔥  [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
+    - 🔥🔥🔥  [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
+    - 🔥🔥🔥  [GLM-Z1-32B-0414](https://huggingface.co/THUDM/GLM-Z1-32B-0414)
+    - 🔥🔥🔥  [GLM-4-32B-0414](https://huggingface.co/THUDM/GLM-4-32B-0414)
     - 🔥🔥🔥  [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B)
     - 🔥🔥🔥  [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
     - 🔥🔥🔥  [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)
diff --git a/configs/dbgpt-local-qwen3.example.toml b/configs/dbgpt-local-qwen3.example.toml
new file mode 100644
index 000000000..31f644e70
--- /dev/null
+++ b/configs/dbgpt-local-qwen3.example.toml
@@ -0,0 +1,37 @@
+[system]
+# Load language from environment variable(It is set by the hook)
+language = "${env:DBGPT_LANG:-zh}"
+api_keys = []
+encrypt_key = "your_secret_key"
+
+# Server Configurations
+[service.web]
+host = "0.0.0.0"
+port = 5670
+
+[service.web.database]
+type = "sqlite"
+path = "pilot/meta_data/dbgpt.db"
+
+[rag.storage]
+[rag.storage.vector]
+type = "chroma"
+persist_path = "pilot/data"
+
+# Model Configurations
+[models]
+[[models.llms]]
+name = "Qwen/Qwen3-14B"
+provider = "hf"
+# If not provided, the model will be downloaded from the Hugging Face model hub
+# uncomment the following line to specify the model path in the local file system
+# path = "the-model-path-in-the-local-file-system"
+# Force the model to be used in non-thinking mode, set to false
+# reasoning_model = false
+
+[[models.embeddings]]
+name = "BAAI/bge-large-zh-v1.5"
+provider = "hf"
+# If not provided, the model will be downloaded from the Hugging Face model hub
+# uncomment the following line to specify the model path in the local file system
+# path = "the-model-path-in-the-local-file-system"
diff --git a/docs/blog/2025-04-29-db-gpt-qwen3-support.md b/docs/blog/2025-04-29-db-gpt-qwen3-support.md
new file mode 100644
index 000000000..51bbc0117
--- /dev/null
+++ b/docs/blog/2025-04-29-db-gpt-qwen3-support.md
@@ -0,0 +1,95 @@
+---
+slug: db-gpt-qwen3-support
+title: DB-GPT Now Supports Qwen3 Series Models
+authors: fangyinc
+tags: [Qwen, Qwen3, LLM]
+---
+
+We are thrilled to announce that DB-GPT now supports inference with the Qwen3 series models!
+
+## Introducing Qwen3
+
+Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support, with the following key features:
+
+- **Uniquely support of seamless switching between thinking mode** (for complex logical reasoning, math, and coding) and **non-thinking mode** (for efficient, general-purpose dialogue) **within single model**, ensuring optimal performance across various scenarios.
+- **Significantly enhancement in its reasoning capabilities**, surpassing previous QwQ (in thinking mode) and Qwen2.5 instruct models (in non-thinking mode) on mathematics, code generation, and commonsense logical reasoning.
+- **Superior human preference alignment**, excelling in creative writing, role-playing, multi-turn dialogues, and instruction following, to deliver a more natural, engaging, and immersive conversational experience.
+- **Expertise in agent capabilities**, enabling precise integration with external tools in both thinking and unthinking modes and achieving leading performance among open-source models in complex agent-based tasks.
+- **Support of 100+ languages and dialects** with strong capabilities for **multilingual instruction following** and **translation**.
+
+## How to Access Qwen3
+
+Your can access the Qwen3 models according to [Access to Hugging Face](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) or [ModelScope](https://modelscope.cn/collections/Qwen3-9743180bdc6b48)
+
+## Using Qwen3 in DB-GPT
+
+Please read the [Source Code Deployment](../docs/installation/sourcecode) to learn how to install DB-GPT from source code.
+
+Qwen3 needs upgrade your transformers >= 4.51.0, please upgrade your transformers.
+
+Here is the command to install the required dependencies for Qwen3:
+
+```bash
+# Use uv to install dependencies needed for Qwen3
+# Install core dependencies and select desired extensions
+uv sync --all-packages \
+--extra "base" \
+--extra "cuda121" \
+--extra "hf" \
+--extra "rag" \
+--extra "storage_chromadb" \
+--extra "quant_bnb" \
+--extra "dbgpts" \
+--extra "hf_qwen3"
+```
+
+To run DB-GPT with the local Qwen3 model. You can provide a configuration file to specify the model path and other parameters.
+Here is an example configuration file `configs/dbgpt-local-qwen3.toml`:
+
+```toml
+# Model Configurations
+[models]
+[[models.llms]]
+name = "Qwen/Qwen3-14B"
+provider = "hf"
+# If not provided, the model will be downloaded from the Hugging Face model hub
+# uncomment the following line to specify the model path in the local file system
+# path = "the-model-path-in-the-local-file-system"
+
+[[models.embeddings]]
+name = "BAAI/bge-large-zh-v1.5"
+provider = "hf"
+# If not provided, the model will be downloaded from the Hugging Face model hub
+# uncomment the following line to specify the model path in the local file system
+# path = "the-model-path-in-the-local-file-system"
+```
+In the above configuration file, [[models.llms]] specifies the LLM model, and [[models.embeddings]] specifies the embedding model. If you not provide the path parameter, the model will be downloaded from the Hugging Face model hub according to the name parameter.
+
+Then run the following command to start the webserver:
+
+```bash
+uv run dbgpt start webserver --config configs/dbgpt-local-qwen3.toml
+```
+
+Open your browser and visit `http://localhost:5670` to use the Qwen3 models in DB-GPT.
+
+Enjoy the power of Qwen3 in DB-GPT!
+
+
+## Advanced Configurations
+
+> Uniquely support of seamless switching between thinking mode (for complex logical reasoning, math, and coding) and non-thinking mode (for efficient, general-purpose dialogue) within single model, ensuring optimal performance across various scenarios.
+
+By default, Qwen3 has thinking capabilities enabled. If you want to disable the thinking capabilities, you can set the `reasoning_model=false` configuration in your toml file.
+
+```toml
+[models]
+[[models.llms]]
+name = "Qwen/Qwen3-14B"
+provider = "hf"
+# Force the model to be used in non-thinking mode
+reasoning_model = false
+# If not provided, the model will be downloaded from the Hugging Face model hub
+# uncomment the following line to specify the model path in the local file system
+# path = "the-model-path-in-the-local-file-system"
+```
diff --git a/packages/dbgpt-core/pyproject.toml b/packages/dbgpt-core/pyproject.toml
index 499379e9f..353a810ce 100644
--- a/packages/dbgpt-core/pyproject.toml
+++ b/packages/dbgpt-core/pyproject.toml
@@ -155,6 +155,9 @@ hf_kimi = [
     "blobfile",
     "transformers<4.51.3",
 ]
+hf_qwen3 = [
+    "transformers>=4.51.0",
+]
 
 [build-system]
 requires = ["hatchling"]
@@ -174,6 +177,10 @@ conflicts = [
         { extra = "hf_glm4" },
         { extra = "hf_kimi" },
     ],
+    [
+        { extra = "hf_qwen3" },
+        { extra = "hf_kimi" },
+    ],
 ]
 [tool.hatch.build.targets.wheel]
 packages = ["src/dbgpt"]
diff --git a/packages/dbgpt-core/src/dbgpt/core/interface/media.py b/packages/dbgpt-core/src/dbgpt/core/interface/media.py
index 0b4b1100e..53caa4874 100644
--- a/packages/dbgpt-core/src/dbgpt/core/interface/media.py
+++ b/packages/dbgpt-core/src/dbgpt/core/interface/media.py
@@ -245,7 +245,10 @@ class MediaContent:
 
     @classmethod
     def to_chat_completion_message(
-        cls, role, content: Union[str, "MediaContent", List["MediaContent"]]
+        cls,
+        role,
+        content: Union[str, "MediaContent", List["MediaContent"]],
+        support_media_content: bool = True,
     ) -> ChatCompletionMessageParam:
         """Convert the media contents to chat completion message."""
         if not content:
@@ -255,6 +258,14 @@ class MediaContent:
         if isinstance(content, MediaContent):
             content = [content]
         new_content = [cls._parse_single_media_content(c) for c in content]
+        if not support_media_content:
+            text_content = [
+                c["text"] for c in new_content if c["type"] == "text" and "text" in c
+            ]
+            if not text_content:
+                raise ValueError("No text content found in the media contents")
+            # Not support media content, just pass the string text as content
+            new_content = text_content[0]
         return {
             "role": role,
             "content": new_content,
diff --git a/packages/dbgpt-core/src/dbgpt/core/interface/message.py b/packages/dbgpt-core/src/dbgpt/core/interface/message.py
index 273734f23..6840c4a8c 100755
--- a/packages/dbgpt-core/src/dbgpt/core/interface/message.py
+++ b/packages/dbgpt-core/src/dbgpt/core/interface/message.py
@@ -407,6 +407,7 @@ class ModelMessage(BaseModel):
         messages: List["ModelMessage"],
         convert_to_compatible_format: bool = False,
         support_system_role: bool = True,
+        support_media_content: bool = True,
     ) -> List[Dict[str, str]]:
         """Cover to common message format.
 
@@ -418,6 +419,7 @@ class ModelMessage(BaseModel):
             messages (List["ModelMessage"]): The model messages
             convert_to_compatible_format (bool): Whether to convert to compatible format
             support_system_role (bool): Whether to support system role
+            support_media_content (bool): Whether to support media content
 
         Returns:
             List[Dict[str, str]]: The common messages
@@ -430,7 +432,11 @@ class ModelMessage(BaseModel):
         for message in messages:
             if message.role == ModelMessageRoleType.HUMAN:
                 history.append(
-                    MediaContent.to_chat_completion_message("user", message.content)
+                    MediaContent.to_chat_completion_message(
+                        "user",
+                        message.content,
+                        support_media_content=support_media_content,
+                    )
                 )
             elif message.role == ModelMessageRoleType.SYSTEM:
                 if not support_system_role:
@@ -440,6 +446,7 @@ class ModelMessage(BaseModel):
                     MediaContent.to_chat_completion_message(
                         "system",
                         message.content,
+                        support_media_content=support_media_content,
                     )
                 )
             elif message.role == ModelMessageRoleType.AI:
@@ -447,6 +454,7 @@ class ModelMessage(BaseModel):
                     MediaContent.to_chat_completion_message(
                         "assistant",
                         message.content,
+                        support_media_content=support_media_content,
                     )
                 )
             else:
diff --git a/packages/dbgpt-core/src/dbgpt/model/adapter/base.py b/packages/dbgpt-core/src/dbgpt/model/adapter/base.py
index 219828d22..ffb75fee7 100644
--- a/packages/dbgpt-core/src/dbgpt/model/adapter/base.py
+++ b/packages/dbgpt-core/src/dbgpt/model/adapter/base.py
@@ -367,7 +367,10 @@ class LLMModelAdapter(ABC):
         return roles
 
     def transform_model_messages(
-        self, messages: List[ModelMessage], convert_to_compatible_format: bool = False
+        self,
+        messages: List[ModelMessage],
+        convert_to_compatible_format: bool = False,
+        support_media_content: bool = True,
     ) -> List[Dict[str, str]]:
         """Transform the model messages
 
@@ -392,6 +395,7 @@ class LLMModelAdapter(ABC):
             messages (List[ModelMessage]): The model messages
             convert_to_compatible_format (bool, optional): Whether to convert to
                 compatible format. Defaults to False.
+            support_media_content (bool, optional): Whether to support media content
 
         Returns:
             List[Dict[str, str]]: The transformed model messages
@@ -399,14 +403,20 @@ class LLMModelAdapter(ABC):
         logger.info(f"support_system_message: {self.support_system_message}")
         if not self.support_system_message and convert_to_compatible_format:
             # We will not do any transform in the future
-            return self._transform_to_no_system_messages(messages)
+            return self._transform_to_no_system_messages(
+                messages, support_media_content=support_media_content
+            )
         else:
             return ModelMessage.to_common_messages(
-                messages, convert_to_compatible_format=convert_to_compatible_format
+                messages,
+                convert_to_compatible_format=convert_to_compatible_format,
+                support_media_content=support_media_content,
             )
 
     def _transform_to_no_system_messages(
-        self, messages: List[ModelMessage]
+        self,
+        messages: List[ModelMessage],
+        support_media_content: bool = True,
     ) -> List[Dict[str, str]]:
         """Transform the model messages to no system messages
 
@@ -433,7 +443,9 @@ class LLMModelAdapter(ABC):
         Returns:
             List[Dict[str, str]]: The transformed model messages
         """
-        openai_messages = ModelMessage.to_common_messages(messages)
+        openai_messages = ModelMessage.to_common_messages(
+            messages, support_media_content=support_media_content
+        )
         system_messages = []
         return_messages = []
         for message in openai_messages:
diff --git a/packages/dbgpt-core/src/dbgpt/model/adapter/hf_adapter.py b/packages/dbgpt-core/src/dbgpt/model/adapter/hf_adapter.py
index a8492ee46..83e8aeaa0 100644
--- a/packages/dbgpt-core/src/dbgpt/model/adapter/hf_adapter.py
+++ b/packages/dbgpt-core/src/dbgpt/model/adapter/hf_adapter.py
@@ -600,6 +600,63 @@ class QwenMoeAdapter(NewHFChatModelAdapter):
         )
 
 
+class Qwen3Adapter(QwenAdapter):
+    support_4bit: bool = True
+    support_8bit: bool = True
+
+    def do_match(self, lower_model_name_or_path: Optional[str] = None):
+        return lower_model_name_or_path and (
+            "qwen3" in lower_model_name_or_path
+            and "base" not in lower_model_name_or_path
+        )
+
+    def check_transformer_version(self, current_version: str) -> None:
+        if not current_version >= "4.51.0":
+            raise ValueError(
+                "Qwen3 require transformers.__version__>=4.51.0, please upgrade your"
+                " transformers package."
+            )
+
+    def is_reasoning_model(
+        self,
+        deploy_model_params: LLMDeployModelParameters,
+        lower_model_name_or_path: Optional[str] = None,
+    ) -> bool:
+        if (
+            deploy_model_params.reasoning_model is not None
+            and deploy_model_params.reasoning_model is False
+        ):
+            return False
+        return True
+
+    def get_str_prompt(
+        self,
+        params: Dict,
+        messages: List[ModelMessage],
+        tokenizer: Any,
+        prompt_template: str = None,
+        convert_to_compatible_format: bool = False,
+    ) -> Optional[str]:
+        from transformers import AutoTokenizer
+
+        if not tokenizer:
+            raise ValueError("tokenizer is is None")
+        tokenizer: AutoTokenizer = tokenizer
+
+        is_reasoning_model = params.get("is_reasoning_model", True)
+        messages = self.transform_model_messages(
+            messages, convert_to_compatible_format, support_media_content=False
+        )
+        logger.debug(f"The messages after transform: \n{messages}")
+        str_prompt = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            enable_thinking=is_reasoning_model,
+        )
+        return str_prompt
+
+
 class QwenOmniAdapter(NewHFChatModelAdapter):
     def do_match(self, lower_model_name_or_path: Optional[str] = None):
         return lower_model_name_or_path and (
@@ -997,6 +1054,7 @@ register_model_adapter(Gemma2Adapter)
 register_model_adapter(StarlingLMAdapter)
 register_model_adapter(QwenAdapter)
 register_model_adapter(QwenMoeAdapter)
+register_model_adapter(Qwen3Adapter)
 register_model_adapter(QwenOmniAdapter)
 register_model_adapter(Llama3Adapter)
 register_model_adapter(Llama31Adapter)
diff --git a/packages/dbgpt-core/src/dbgpt/model/cluster/worker/default_worker.py b/packages/dbgpt-core/src/dbgpt/model/cluster/worker/default_worker.py
index 61a1c75b9..9136ea6fb 100644
--- a/packages/dbgpt-core/src/dbgpt/model/cluster/worker/default_worker.py
+++ b/packages/dbgpt-core/src/dbgpt/model/cluster/worker/default_worker.py
@@ -369,6 +369,13 @@ class DefaultModelWorker(ModelWorker):
     def _prepare_generate_stream(
         self, params: Dict, span_operation_name: str, is_stream=True
     ):
+        if self.llm_adapter.is_reasoning_model(
+            self._model_params, self.model_name.lower()
+        ):
+            params["is_reasoning_model"] = True
+        else:
+            params["is_reasoning_model"] = False
+
         params, model_context = self.llm_adapter.model_adaptation(
             params,
             self.model_name,
@@ -427,10 +434,6 @@ class DefaultModelWorker(ModelWorker):
             span_params["messages"] = list(
                 map(lambda m: m.dict(), span_params["messages"])
             )
-        if self.llm_adapter.is_reasoning_model(
-            self._model_params, self.model_name.lower()
-        ):
-            params["is_reasoning_model"] = True
 
         metadata = {
             "is_async_func": self.support_async(),