From 2abd68d6c0aabc2cbd7d1089815dd268533706d4 Mon Sep 17 00:00:00 2001 From: Fangyin Cheng Date: Tue, 29 Apr 2025 09:55:28 +0800 Subject: [PATCH] feat(model): Support Qwen3 models (#2664) --- README.ja.md | 5 + README.md | 5 + README.zh.md | 5 + configs/dbgpt-local-qwen3.example.toml | 37 ++++++++ docs/blog/2025-04-29-db-gpt-qwen3-support.md | 95 +++++++++++++++++++ packages/dbgpt-core/pyproject.toml | 7 ++ .../src/dbgpt/core/interface/media.py | 13 ++- .../src/dbgpt/core/interface/message.py | 10 +- .../src/dbgpt/model/adapter/base.py | 22 ++++- .../src/dbgpt/model/adapter/hf_adapter.py | 58 +++++++++++ .../model/cluster/worker/default_worker.py | 11 ++- 11 files changed, 257 insertions(+), 11 deletions(-) create mode 100644 configs/dbgpt-local-qwen3.example.toml create mode 100644 docs/blog/2025-04-29-db-gpt-qwen3-support.md diff --git a/README.ja.md b/README.ja.md index d1f90faf6..031081e76 100644 --- a/README.ja.md +++ b/README.ja.md @@ -162,6 +162,11 @@ DB-GPTのアーキテクチャは以下の図に示されています: 私たちは、LLaMA/LLaMA2、Baichuan、ChatGLM、Wenxin、Tongyi、Zhipuなど、オープンソースおよびAPIエージェントからの数十の大規模言語モデル(LLM)を含む幅広いモデルをサポートしています。 - ニュース + - 🔥🔥🔥 [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B) + - 🔥🔥🔥 [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B) + - 🔥🔥🔥 [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B) + - 🔥🔥🔥 [GLM-Z1-32B-0414](https://huggingface.co/THUDM/GLM-Z1-32B-0414) + - 🔥🔥🔥 [GLM-4-32B-0414](https://huggingface.co/THUDM/GLM-4-32B-0414) - 🔥🔥🔥 [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B) - 🔥🔥🔥 [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1) - 🔥🔥🔥 [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3) diff --git a/README.md b/README.md index 12539c8d1..eb1766e1a 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,11 @@ At present, we have introduced several key features to showcase our current capa We offer extensive model support, including dozens of large language models (LLMs) from both open-source and API agents, such as LLaMA/LLaMA2, Baichuan, ChatGLM, Wenxin, Tongyi, Zhipu, and many more. - News + - 🔥🔥🔥 [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B) + - 🔥🔥🔥 [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B) + - 🔥🔥🔥 [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B) + - 🔥🔥🔥 [GLM-Z1-32B-0414](https://huggingface.co/THUDM/GLM-Z1-32B-0414) + - 🔥🔥🔥 [GLM-4-32B-0414](https://huggingface.co/THUDM/GLM-4-32B-0414) - 🔥🔥🔥 [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B) - 🔥🔥🔥 [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1) - 🔥🔥🔥 [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3) diff --git a/README.zh.md b/README.zh.md index 1cfc2dae1..cbeab84d9 100644 --- a/README.zh.md +++ b/README.zh.md @@ -163,6 +163,11 @@ 海量模型支持,包括开源、API代理等几十种大语言模型。如LLaMA/LLaMA2、Baichuan、ChatGLM、文心、通义、智谱等。当前已支持如下模型: - 新增支持模型 + - 🔥🔥🔥 [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B) + - 🔥🔥🔥 [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B) + - 🔥🔥🔥 [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B) + - 🔥🔥🔥 [GLM-Z1-32B-0414](https://huggingface.co/THUDM/GLM-Z1-32B-0414) + - 🔥🔥🔥 [GLM-4-32B-0414](https://huggingface.co/THUDM/GLM-4-32B-0414) - 🔥🔥🔥 [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B) - 🔥🔥🔥 [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1) - 🔥🔥🔥 [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3) diff --git a/configs/dbgpt-local-qwen3.example.toml b/configs/dbgpt-local-qwen3.example.toml new file mode 100644 index 000000000..31f644e70 --- /dev/null +++ b/configs/dbgpt-local-qwen3.example.toml @@ -0,0 +1,37 @@ +[system] +# Load language from environment variable(It is set by the hook) +language = "${env:DBGPT_LANG:-zh}" +api_keys = [] +encrypt_key = "your_secret_key" + +# Server Configurations +[service.web] +host = "0.0.0.0" +port = 5670 + +[service.web.database] +type = "sqlite" +path = "pilot/meta_data/dbgpt.db" + +[rag.storage] +[rag.storage.vector] +type = "chroma" +persist_path = "pilot/data" + +# Model Configurations +[models] +[[models.llms]] +name = "Qwen/Qwen3-14B" +provider = "hf" +# If not provided, the model will be downloaded from the Hugging Face model hub +# uncomment the following line to specify the model path in the local file system +# path = "the-model-path-in-the-local-file-system" +# Force the model to be used in non-thinking mode, set to false +# reasoning_model = false + +[[models.embeddings]] +name = "BAAI/bge-large-zh-v1.5" +provider = "hf" +# If not provided, the model will be downloaded from the Hugging Face model hub +# uncomment the following line to specify the model path in the local file system +# path = "the-model-path-in-the-local-file-system" diff --git a/docs/blog/2025-04-29-db-gpt-qwen3-support.md b/docs/blog/2025-04-29-db-gpt-qwen3-support.md new file mode 100644 index 000000000..51bbc0117 --- /dev/null +++ b/docs/blog/2025-04-29-db-gpt-qwen3-support.md @@ -0,0 +1,95 @@ +--- +slug: db-gpt-qwen3-support +title: DB-GPT Now Supports Qwen3 Series Models +authors: fangyinc +tags: [Qwen, Qwen3, LLM] +--- + +We are thrilled to announce that DB-GPT now supports inference with the Qwen3 series models! + +## Introducing Qwen3 + +Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support, with the following key features: + +- **Uniquely support of seamless switching between thinking mode** (for complex logical reasoning, math, and coding) and **non-thinking mode** (for efficient, general-purpose dialogue) **within single model**, ensuring optimal performance across various scenarios. +- **Significantly enhancement in its reasoning capabilities**, surpassing previous QwQ (in thinking mode) and Qwen2.5 instruct models (in non-thinking mode) on mathematics, code generation, and commonsense logical reasoning. +- **Superior human preference alignment**, excelling in creative writing, role-playing, multi-turn dialogues, and instruction following, to deliver a more natural, engaging, and immersive conversational experience. +- **Expertise in agent capabilities**, enabling precise integration with external tools in both thinking and unthinking modes and achieving leading performance among open-source models in complex agent-based tasks. +- **Support of 100+ languages and dialects** with strong capabilities for **multilingual instruction following** and **translation**. + +## How to Access Qwen3 + +Your can access the Qwen3 models according to [Access to Hugging Face](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) or [ModelScope](https://modelscope.cn/collections/Qwen3-9743180bdc6b48) + +## Using Qwen3 in DB-GPT + +Please read the [Source Code Deployment](../docs/installation/sourcecode) to learn how to install DB-GPT from source code. + +Qwen3 needs upgrade your transformers >= 4.51.0, please upgrade your transformers. + +Here is the command to install the required dependencies for Qwen3: + +```bash +# Use uv to install dependencies needed for Qwen3 +# Install core dependencies and select desired extensions +uv sync --all-packages \ +--extra "base" \ +--extra "cuda121" \ +--extra "hf" \ +--extra "rag" \ +--extra "storage_chromadb" \ +--extra "quant_bnb" \ +--extra "dbgpts" \ +--extra "hf_qwen3" +``` + +To run DB-GPT with the local Qwen3 model. You can provide a configuration file to specify the model path and other parameters. +Here is an example configuration file `configs/dbgpt-local-qwen3.toml`: + +```toml +# Model Configurations +[models] +[[models.llms]] +name = "Qwen/Qwen3-14B" +provider = "hf" +# If not provided, the model will be downloaded from the Hugging Face model hub +# uncomment the following line to specify the model path in the local file system +# path = "the-model-path-in-the-local-file-system" + +[[models.embeddings]] +name = "BAAI/bge-large-zh-v1.5" +provider = "hf" +# If not provided, the model will be downloaded from the Hugging Face model hub +# uncomment the following line to specify the model path in the local file system +# path = "the-model-path-in-the-local-file-system" +``` +In the above configuration file, [[models.llms]] specifies the LLM model, and [[models.embeddings]] specifies the embedding model. If you not provide the path parameter, the model will be downloaded from the Hugging Face model hub according to the name parameter. + +Then run the following command to start the webserver: + +```bash +uv run dbgpt start webserver --config configs/dbgpt-local-qwen3.toml +``` + +Open your browser and visit `http://localhost:5670` to use the Qwen3 models in DB-GPT. + +Enjoy the power of Qwen3 in DB-GPT! + + +## Advanced Configurations + +> Uniquely support of seamless switching between thinking mode (for complex logical reasoning, math, and coding) and non-thinking mode (for efficient, general-purpose dialogue) within single model, ensuring optimal performance across various scenarios. + +By default, Qwen3 has thinking capabilities enabled. If you want to disable the thinking capabilities, you can set the `reasoning_model=false` configuration in your toml file. + +```toml +[models] +[[models.llms]] +name = "Qwen/Qwen3-14B" +provider = "hf" +# Force the model to be used in non-thinking mode +reasoning_model = false +# If not provided, the model will be downloaded from the Hugging Face model hub +# uncomment the following line to specify the model path in the local file system +# path = "the-model-path-in-the-local-file-system" +``` diff --git a/packages/dbgpt-core/pyproject.toml b/packages/dbgpt-core/pyproject.toml index 499379e9f..353a810ce 100644 --- a/packages/dbgpt-core/pyproject.toml +++ b/packages/dbgpt-core/pyproject.toml @@ -155,6 +155,9 @@ hf_kimi = [ "blobfile", "transformers<4.51.3", ] +hf_qwen3 = [ + "transformers>=4.51.0", +] [build-system] requires = ["hatchling"] @@ -174,6 +177,10 @@ conflicts = [ { extra = "hf_glm4" }, { extra = "hf_kimi" }, ], + [ + { extra = "hf_qwen3" }, + { extra = "hf_kimi" }, + ], ] [tool.hatch.build.targets.wheel] packages = ["src/dbgpt"] diff --git a/packages/dbgpt-core/src/dbgpt/core/interface/media.py b/packages/dbgpt-core/src/dbgpt/core/interface/media.py index 0b4b1100e..53caa4874 100644 --- a/packages/dbgpt-core/src/dbgpt/core/interface/media.py +++ b/packages/dbgpt-core/src/dbgpt/core/interface/media.py @@ -245,7 +245,10 @@ class MediaContent: @classmethod def to_chat_completion_message( - cls, role, content: Union[str, "MediaContent", List["MediaContent"]] + cls, + role, + content: Union[str, "MediaContent", List["MediaContent"]], + support_media_content: bool = True, ) -> ChatCompletionMessageParam: """Convert the media contents to chat completion message.""" if not content: @@ -255,6 +258,14 @@ class MediaContent: if isinstance(content, MediaContent): content = [content] new_content = [cls._parse_single_media_content(c) for c in content] + if not support_media_content: + text_content = [ + c["text"] for c in new_content if c["type"] == "text" and "text" in c + ] + if not text_content: + raise ValueError("No text content found in the media contents") + # Not support media content, just pass the string text as content + new_content = text_content[0] return { "role": role, "content": new_content, diff --git a/packages/dbgpt-core/src/dbgpt/core/interface/message.py b/packages/dbgpt-core/src/dbgpt/core/interface/message.py index 273734f23..6840c4a8c 100755 --- a/packages/dbgpt-core/src/dbgpt/core/interface/message.py +++ b/packages/dbgpt-core/src/dbgpt/core/interface/message.py @@ -407,6 +407,7 @@ class ModelMessage(BaseModel): messages: List["ModelMessage"], convert_to_compatible_format: bool = False, support_system_role: bool = True, + support_media_content: bool = True, ) -> List[Dict[str, str]]: """Cover to common message format. @@ -418,6 +419,7 @@ class ModelMessage(BaseModel): messages (List["ModelMessage"]): The model messages convert_to_compatible_format (bool): Whether to convert to compatible format support_system_role (bool): Whether to support system role + support_media_content (bool): Whether to support media content Returns: List[Dict[str, str]]: The common messages @@ -430,7 +432,11 @@ class ModelMessage(BaseModel): for message in messages: if message.role == ModelMessageRoleType.HUMAN: history.append( - MediaContent.to_chat_completion_message("user", message.content) + MediaContent.to_chat_completion_message( + "user", + message.content, + support_media_content=support_media_content, + ) ) elif message.role == ModelMessageRoleType.SYSTEM: if not support_system_role: @@ -440,6 +446,7 @@ class ModelMessage(BaseModel): MediaContent.to_chat_completion_message( "system", message.content, + support_media_content=support_media_content, ) ) elif message.role == ModelMessageRoleType.AI: @@ -447,6 +454,7 @@ class ModelMessage(BaseModel): MediaContent.to_chat_completion_message( "assistant", message.content, + support_media_content=support_media_content, ) ) else: diff --git a/packages/dbgpt-core/src/dbgpt/model/adapter/base.py b/packages/dbgpt-core/src/dbgpt/model/adapter/base.py index 219828d22..ffb75fee7 100644 --- a/packages/dbgpt-core/src/dbgpt/model/adapter/base.py +++ b/packages/dbgpt-core/src/dbgpt/model/adapter/base.py @@ -367,7 +367,10 @@ class LLMModelAdapter(ABC): return roles def transform_model_messages( - self, messages: List[ModelMessage], convert_to_compatible_format: bool = False + self, + messages: List[ModelMessage], + convert_to_compatible_format: bool = False, + support_media_content: bool = True, ) -> List[Dict[str, str]]: """Transform the model messages @@ -392,6 +395,7 @@ class LLMModelAdapter(ABC): messages (List[ModelMessage]): The model messages convert_to_compatible_format (bool, optional): Whether to convert to compatible format. Defaults to False. + support_media_content (bool, optional): Whether to support media content Returns: List[Dict[str, str]]: The transformed model messages @@ -399,14 +403,20 @@ class LLMModelAdapter(ABC): logger.info(f"support_system_message: {self.support_system_message}") if not self.support_system_message and convert_to_compatible_format: # We will not do any transform in the future - return self._transform_to_no_system_messages(messages) + return self._transform_to_no_system_messages( + messages, support_media_content=support_media_content + ) else: return ModelMessage.to_common_messages( - messages, convert_to_compatible_format=convert_to_compatible_format + messages, + convert_to_compatible_format=convert_to_compatible_format, + support_media_content=support_media_content, ) def _transform_to_no_system_messages( - self, messages: List[ModelMessage] + self, + messages: List[ModelMessage], + support_media_content: bool = True, ) -> List[Dict[str, str]]: """Transform the model messages to no system messages @@ -433,7 +443,9 @@ class LLMModelAdapter(ABC): Returns: List[Dict[str, str]]: The transformed model messages """ - openai_messages = ModelMessage.to_common_messages(messages) + openai_messages = ModelMessage.to_common_messages( + messages, support_media_content=support_media_content + ) system_messages = [] return_messages = [] for message in openai_messages: diff --git a/packages/dbgpt-core/src/dbgpt/model/adapter/hf_adapter.py b/packages/dbgpt-core/src/dbgpt/model/adapter/hf_adapter.py index a8492ee46..83e8aeaa0 100644 --- a/packages/dbgpt-core/src/dbgpt/model/adapter/hf_adapter.py +++ b/packages/dbgpt-core/src/dbgpt/model/adapter/hf_adapter.py @@ -600,6 +600,63 @@ class QwenMoeAdapter(NewHFChatModelAdapter): ) +class Qwen3Adapter(QwenAdapter): + support_4bit: bool = True + support_8bit: bool = True + + def do_match(self, lower_model_name_or_path: Optional[str] = None): + return lower_model_name_or_path and ( + "qwen3" in lower_model_name_or_path + and "base" not in lower_model_name_or_path + ) + + def check_transformer_version(self, current_version: str) -> None: + if not current_version >= "4.51.0": + raise ValueError( + "Qwen3 require transformers.__version__>=4.51.0, please upgrade your" + " transformers package." + ) + + def is_reasoning_model( + self, + deploy_model_params: LLMDeployModelParameters, + lower_model_name_or_path: Optional[str] = None, + ) -> bool: + if ( + deploy_model_params.reasoning_model is not None + and deploy_model_params.reasoning_model is False + ): + return False + return True + + def get_str_prompt( + self, + params: Dict, + messages: List[ModelMessage], + tokenizer: Any, + prompt_template: str = None, + convert_to_compatible_format: bool = False, + ) -> Optional[str]: + from transformers import AutoTokenizer + + if not tokenizer: + raise ValueError("tokenizer is is None") + tokenizer: AutoTokenizer = tokenizer + + is_reasoning_model = params.get("is_reasoning_model", True) + messages = self.transform_model_messages( + messages, convert_to_compatible_format, support_media_content=False + ) + logger.debug(f"The messages after transform: \n{messages}") + str_prompt = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + enable_thinking=is_reasoning_model, + ) + return str_prompt + + class QwenOmniAdapter(NewHFChatModelAdapter): def do_match(self, lower_model_name_or_path: Optional[str] = None): return lower_model_name_or_path and ( @@ -997,6 +1054,7 @@ register_model_adapter(Gemma2Adapter) register_model_adapter(StarlingLMAdapter) register_model_adapter(QwenAdapter) register_model_adapter(QwenMoeAdapter) +register_model_adapter(Qwen3Adapter) register_model_adapter(QwenOmniAdapter) register_model_adapter(Llama3Adapter) register_model_adapter(Llama31Adapter) diff --git a/packages/dbgpt-core/src/dbgpt/model/cluster/worker/default_worker.py b/packages/dbgpt-core/src/dbgpt/model/cluster/worker/default_worker.py index 61a1c75b9..9136ea6fb 100644 --- a/packages/dbgpt-core/src/dbgpt/model/cluster/worker/default_worker.py +++ b/packages/dbgpt-core/src/dbgpt/model/cluster/worker/default_worker.py @@ -369,6 +369,13 @@ class DefaultModelWorker(ModelWorker): def _prepare_generate_stream( self, params: Dict, span_operation_name: str, is_stream=True ): + if self.llm_adapter.is_reasoning_model( + self._model_params, self.model_name.lower() + ): + params["is_reasoning_model"] = True + else: + params["is_reasoning_model"] = False + params, model_context = self.llm_adapter.model_adaptation( params, self.model_name, @@ -427,10 +434,6 @@ class DefaultModelWorker(ModelWorker): span_params["messages"] = list( map(lambda m: m.dict(), span_params["messages"]) ) - if self.llm_adapter.is_reasoning_model( - self._model_params, self.model_name.lower() - ): - params["is_reasoning_model"] = True metadata = { "is_async_func": self.support_async(),