mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-07 03:14:42 +00:00
feat(model): Support Qwen3 models (#2664)
This commit is contained in:
parent
bcb43266cf
commit
2abd68d6c0
@ -162,6 +162,11 @@ DB-GPTのアーキテクチャは以下の図に示されています:
|
|||||||
私たちは、LLaMA/LLaMA2、Baichuan、ChatGLM、Wenxin、Tongyi、Zhipuなど、オープンソースおよびAPIエージェントからの数十の大規模言語モデル(LLM)を含む幅広いモデルをサポートしています。
|
私たちは、LLaMA/LLaMA2、Baichuan、ChatGLM、Wenxin、Tongyi、Zhipuなど、オープンソースおよびAPIエージェントからの数十の大規模言語モデル(LLM)を含む幅広いモデルをサポートしています。
|
||||||
|
|
||||||
- ニュース
|
- ニュース
|
||||||
|
- 🔥🔥🔥 [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
|
||||||
|
- 🔥🔥🔥 [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
|
||||||
|
- 🔥🔥🔥 [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
|
||||||
|
- 🔥🔥🔥 [GLM-Z1-32B-0414](https://huggingface.co/THUDM/GLM-Z1-32B-0414)
|
||||||
|
- 🔥🔥🔥 [GLM-4-32B-0414](https://huggingface.co/THUDM/GLM-4-32B-0414)
|
||||||
- 🔥🔥🔥 [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B)
|
- 🔥🔥🔥 [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B)
|
||||||
- 🔥🔥🔥 [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
|
- 🔥🔥🔥 [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
|
||||||
- 🔥🔥🔥 [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)
|
- 🔥🔥🔥 [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)
|
||||||
|
@ -169,6 +169,11 @@ At present, we have introduced several key features to showcase our current capa
|
|||||||
We offer extensive model support, including dozens of large language models (LLMs) from both open-source and API agents, such as LLaMA/LLaMA2, Baichuan, ChatGLM, Wenxin, Tongyi, Zhipu, and many more.
|
We offer extensive model support, including dozens of large language models (LLMs) from both open-source and API agents, such as LLaMA/LLaMA2, Baichuan, ChatGLM, Wenxin, Tongyi, Zhipu, and many more.
|
||||||
|
|
||||||
- News
|
- News
|
||||||
|
- 🔥🔥🔥 [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
|
||||||
|
- 🔥🔥🔥 [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
|
||||||
|
- 🔥🔥🔥 [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
|
||||||
|
- 🔥🔥🔥 [GLM-Z1-32B-0414](https://huggingface.co/THUDM/GLM-Z1-32B-0414)
|
||||||
|
- 🔥🔥🔥 [GLM-4-32B-0414](https://huggingface.co/THUDM/GLM-4-32B-0414)
|
||||||
- 🔥🔥🔥 [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B)
|
- 🔥🔥🔥 [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B)
|
||||||
- 🔥🔥🔥 [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
|
- 🔥🔥🔥 [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
|
||||||
- 🔥🔥🔥 [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)
|
- 🔥🔥🔥 [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)
|
||||||
|
@ -163,6 +163,11 @@
|
|||||||
海量模型支持,包括开源、API代理等几十种大语言模型。如LLaMA/LLaMA2、Baichuan、ChatGLM、文心、通义、智谱等。当前已支持如下模型:
|
海量模型支持,包括开源、API代理等几十种大语言模型。如LLaMA/LLaMA2、Baichuan、ChatGLM、文心、通义、智谱等。当前已支持如下模型:
|
||||||
|
|
||||||
- 新增支持模型
|
- 新增支持模型
|
||||||
|
- 🔥🔥🔥 [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
|
||||||
|
- 🔥🔥🔥 [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
|
||||||
|
- 🔥🔥🔥 [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
|
||||||
|
- 🔥🔥🔥 [GLM-Z1-32B-0414](https://huggingface.co/THUDM/GLM-Z1-32B-0414)
|
||||||
|
- 🔥🔥🔥 [GLM-4-32B-0414](https://huggingface.co/THUDM/GLM-4-32B-0414)
|
||||||
- 🔥🔥🔥 [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B)
|
- 🔥🔥🔥 [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B)
|
||||||
- 🔥🔥🔥 [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
|
- 🔥🔥🔥 [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
|
||||||
- 🔥🔥🔥 [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)
|
- 🔥🔥🔥 [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)
|
||||||
|
37
configs/dbgpt-local-qwen3.example.toml
Normal file
37
configs/dbgpt-local-qwen3.example.toml
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
[system]
|
||||||
|
# Load language from environment variable(It is set by the hook)
|
||||||
|
language = "${env:DBGPT_LANG:-zh}"
|
||||||
|
api_keys = []
|
||||||
|
encrypt_key = "your_secret_key"
|
||||||
|
|
||||||
|
# Server Configurations
|
||||||
|
[service.web]
|
||||||
|
host = "0.0.0.0"
|
||||||
|
port = 5670
|
||||||
|
|
||||||
|
[service.web.database]
|
||||||
|
type = "sqlite"
|
||||||
|
path = "pilot/meta_data/dbgpt.db"
|
||||||
|
|
||||||
|
[rag.storage]
|
||||||
|
[rag.storage.vector]
|
||||||
|
type = "chroma"
|
||||||
|
persist_path = "pilot/data"
|
||||||
|
|
||||||
|
# Model Configurations
|
||||||
|
[models]
|
||||||
|
[[models.llms]]
|
||||||
|
name = "Qwen/Qwen3-14B"
|
||||||
|
provider = "hf"
|
||||||
|
# If not provided, the model will be downloaded from the Hugging Face model hub
|
||||||
|
# uncomment the following line to specify the model path in the local file system
|
||||||
|
# path = "the-model-path-in-the-local-file-system"
|
||||||
|
# Force the model to be used in non-thinking mode, set to false
|
||||||
|
# reasoning_model = false
|
||||||
|
|
||||||
|
[[models.embeddings]]
|
||||||
|
name = "BAAI/bge-large-zh-v1.5"
|
||||||
|
provider = "hf"
|
||||||
|
# If not provided, the model will be downloaded from the Hugging Face model hub
|
||||||
|
# uncomment the following line to specify the model path in the local file system
|
||||||
|
# path = "the-model-path-in-the-local-file-system"
|
95
docs/blog/2025-04-29-db-gpt-qwen3-support.md
Normal file
95
docs/blog/2025-04-29-db-gpt-qwen3-support.md
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
---
|
||||||
|
slug: db-gpt-qwen3-support
|
||||||
|
title: DB-GPT Now Supports Qwen3 Series Models
|
||||||
|
authors: fangyinc
|
||||||
|
tags: [Qwen, Qwen3, LLM]
|
||||||
|
---
|
||||||
|
|
||||||
|
We are thrilled to announce that DB-GPT now supports inference with the Qwen3 series models!
|
||||||
|
|
||||||
|
## Introducing Qwen3
|
||||||
|
|
||||||
|
Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support, with the following key features:
|
||||||
|
|
||||||
|
- **Uniquely support of seamless switching between thinking mode** (for complex logical reasoning, math, and coding) and **non-thinking mode** (for efficient, general-purpose dialogue) **within single model**, ensuring optimal performance across various scenarios.
|
||||||
|
- **Significantly enhancement in its reasoning capabilities**, surpassing previous QwQ (in thinking mode) and Qwen2.5 instruct models (in non-thinking mode) on mathematics, code generation, and commonsense logical reasoning.
|
||||||
|
- **Superior human preference alignment**, excelling in creative writing, role-playing, multi-turn dialogues, and instruction following, to deliver a more natural, engaging, and immersive conversational experience.
|
||||||
|
- **Expertise in agent capabilities**, enabling precise integration with external tools in both thinking and unthinking modes and achieving leading performance among open-source models in complex agent-based tasks.
|
||||||
|
- **Support of 100+ languages and dialects** with strong capabilities for **multilingual instruction following** and **translation**.
|
||||||
|
|
||||||
|
## How to Access Qwen3
|
||||||
|
|
||||||
|
Your can access the Qwen3 models according to [Access to Hugging Face](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) or [ModelScope](https://modelscope.cn/collections/Qwen3-9743180bdc6b48)
|
||||||
|
|
||||||
|
## Using Qwen3 in DB-GPT
|
||||||
|
|
||||||
|
Please read the [Source Code Deployment](../docs/installation/sourcecode) to learn how to install DB-GPT from source code.
|
||||||
|
|
||||||
|
Qwen3 needs upgrade your transformers >= 4.51.0, please upgrade your transformers.
|
||||||
|
|
||||||
|
Here is the command to install the required dependencies for Qwen3:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Use uv to install dependencies needed for Qwen3
|
||||||
|
# Install core dependencies and select desired extensions
|
||||||
|
uv sync --all-packages \
|
||||||
|
--extra "base" \
|
||||||
|
--extra "cuda121" \
|
||||||
|
--extra "hf" \
|
||||||
|
--extra "rag" \
|
||||||
|
--extra "storage_chromadb" \
|
||||||
|
--extra "quant_bnb" \
|
||||||
|
--extra "dbgpts" \
|
||||||
|
--extra "hf_qwen3"
|
||||||
|
```
|
||||||
|
|
||||||
|
To run DB-GPT with the local Qwen3 model. You can provide a configuration file to specify the model path and other parameters.
|
||||||
|
Here is an example configuration file `configs/dbgpt-local-qwen3.toml`:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
# Model Configurations
|
||||||
|
[models]
|
||||||
|
[[models.llms]]
|
||||||
|
name = "Qwen/Qwen3-14B"
|
||||||
|
provider = "hf"
|
||||||
|
# If not provided, the model will be downloaded from the Hugging Face model hub
|
||||||
|
# uncomment the following line to specify the model path in the local file system
|
||||||
|
# path = "the-model-path-in-the-local-file-system"
|
||||||
|
|
||||||
|
[[models.embeddings]]
|
||||||
|
name = "BAAI/bge-large-zh-v1.5"
|
||||||
|
provider = "hf"
|
||||||
|
# If not provided, the model will be downloaded from the Hugging Face model hub
|
||||||
|
# uncomment the following line to specify the model path in the local file system
|
||||||
|
# path = "the-model-path-in-the-local-file-system"
|
||||||
|
```
|
||||||
|
In the above configuration file, [[models.llms]] specifies the LLM model, and [[models.embeddings]] specifies the embedding model. If you not provide the path parameter, the model will be downloaded from the Hugging Face model hub according to the name parameter.
|
||||||
|
|
||||||
|
Then run the following command to start the webserver:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run dbgpt start webserver --config configs/dbgpt-local-qwen3.toml
|
||||||
|
```
|
||||||
|
|
||||||
|
Open your browser and visit `http://localhost:5670` to use the Qwen3 models in DB-GPT.
|
||||||
|
|
||||||
|
Enjoy the power of Qwen3 in DB-GPT!
|
||||||
|
|
||||||
|
|
||||||
|
## Advanced Configurations
|
||||||
|
|
||||||
|
> Uniquely support of seamless switching between thinking mode (for complex logical reasoning, math, and coding) and non-thinking mode (for efficient, general-purpose dialogue) within single model, ensuring optimal performance across various scenarios.
|
||||||
|
|
||||||
|
By default, Qwen3 has thinking capabilities enabled. If you want to disable the thinking capabilities, you can set the `reasoning_model=false` configuration in your toml file.
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[models]
|
||||||
|
[[models.llms]]
|
||||||
|
name = "Qwen/Qwen3-14B"
|
||||||
|
provider = "hf"
|
||||||
|
# Force the model to be used in non-thinking mode
|
||||||
|
reasoning_model = false
|
||||||
|
# If not provided, the model will be downloaded from the Hugging Face model hub
|
||||||
|
# uncomment the following line to specify the model path in the local file system
|
||||||
|
# path = "the-model-path-in-the-local-file-system"
|
||||||
|
```
|
@ -155,6 +155,9 @@ hf_kimi = [
|
|||||||
"blobfile",
|
"blobfile",
|
||||||
"transformers<4.51.3",
|
"transformers<4.51.3",
|
||||||
]
|
]
|
||||||
|
hf_qwen3 = [
|
||||||
|
"transformers>=4.51.0",
|
||||||
|
]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["hatchling"]
|
requires = ["hatchling"]
|
||||||
@ -174,6 +177,10 @@ conflicts = [
|
|||||||
{ extra = "hf_glm4" },
|
{ extra = "hf_glm4" },
|
||||||
{ extra = "hf_kimi" },
|
{ extra = "hf_kimi" },
|
||||||
],
|
],
|
||||||
|
[
|
||||||
|
{ extra = "hf_qwen3" },
|
||||||
|
{ extra = "hf_kimi" },
|
||||||
|
],
|
||||||
]
|
]
|
||||||
[tool.hatch.build.targets.wheel]
|
[tool.hatch.build.targets.wheel]
|
||||||
packages = ["src/dbgpt"]
|
packages = ["src/dbgpt"]
|
||||||
|
@ -245,7 +245,10 @@ class MediaContent:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def to_chat_completion_message(
|
def to_chat_completion_message(
|
||||||
cls, role, content: Union[str, "MediaContent", List["MediaContent"]]
|
cls,
|
||||||
|
role,
|
||||||
|
content: Union[str, "MediaContent", List["MediaContent"]],
|
||||||
|
support_media_content: bool = True,
|
||||||
) -> ChatCompletionMessageParam:
|
) -> ChatCompletionMessageParam:
|
||||||
"""Convert the media contents to chat completion message."""
|
"""Convert the media contents to chat completion message."""
|
||||||
if not content:
|
if not content:
|
||||||
@ -255,6 +258,14 @@ class MediaContent:
|
|||||||
if isinstance(content, MediaContent):
|
if isinstance(content, MediaContent):
|
||||||
content = [content]
|
content = [content]
|
||||||
new_content = [cls._parse_single_media_content(c) for c in content]
|
new_content = [cls._parse_single_media_content(c) for c in content]
|
||||||
|
if not support_media_content:
|
||||||
|
text_content = [
|
||||||
|
c["text"] for c in new_content if c["type"] == "text" and "text" in c
|
||||||
|
]
|
||||||
|
if not text_content:
|
||||||
|
raise ValueError("No text content found in the media contents")
|
||||||
|
# Not support media content, just pass the string text as content
|
||||||
|
new_content = text_content[0]
|
||||||
return {
|
return {
|
||||||
"role": role,
|
"role": role,
|
||||||
"content": new_content,
|
"content": new_content,
|
||||||
|
@ -407,6 +407,7 @@ class ModelMessage(BaseModel):
|
|||||||
messages: List["ModelMessage"],
|
messages: List["ModelMessage"],
|
||||||
convert_to_compatible_format: bool = False,
|
convert_to_compatible_format: bool = False,
|
||||||
support_system_role: bool = True,
|
support_system_role: bool = True,
|
||||||
|
support_media_content: bool = True,
|
||||||
) -> List[Dict[str, str]]:
|
) -> List[Dict[str, str]]:
|
||||||
"""Cover to common message format.
|
"""Cover to common message format.
|
||||||
|
|
||||||
@ -418,6 +419,7 @@ class ModelMessage(BaseModel):
|
|||||||
messages (List["ModelMessage"]): The model messages
|
messages (List["ModelMessage"]): The model messages
|
||||||
convert_to_compatible_format (bool): Whether to convert to compatible format
|
convert_to_compatible_format (bool): Whether to convert to compatible format
|
||||||
support_system_role (bool): Whether to support system role
|
support_system_role (bool): Whether to support system role
|
||||||
|
support_media_content (bool): Whether to support media content
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Dict[str, str]]: The common messages
|
List[Dict[str, str]]: The common messages
|
||||||
@ -430,7 +432,11 @@ class ModelMessage(BaseModel):
|
|||||||
for message in messages:
|
for message in messages:
|
||||||
if message.role == ModelMessageRoleType.HUMAN:
|
if message.role == ModelMessageRoleType.HUMAN:
|
||||||
history.append(
|
history.append(
|
||||||
MediaContent.to_chat_completion_message("user", message.content)
|
MediaContent.to_chat_completion_message(
|
||||||
|
"user",
|
||||||
|
message.content,
|
||||||
|
support_media_content=support_media_content,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
elif message.role == ModelMessageRoleType.SYSTEM:
|
elif message.role == ModelMessageRoleType.SYSTEM:
|
||||||
if not support_system_role:
|
if not support_system_role:
|
||||||
@ -440,6 +446,7 @@ class ModelMessage(BaseModel):
|
|||||||
MediaContent.to_chat_completion_message(
|
MediaContent.to_chat_completion_message(
|
||||||
"system",
|
"system",
|
||||||
message.content,
|
message.content,
|
||||||
|
support_media_content=support_media_content,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
elif message.role == ModelMessageRoleType.AI:
|
elif message.role == ModelMessageRoleType.AI:
|
||||||
@ -447,6 +454,7 @@ class ModelMessage(BaseModel):
|
|||||||
MediaContent.to_chat_completion_message(
|
MediaContent.to_chat_completion_message(
|
||||||
"assistant",
|
"assistant",
|
||||||
message.content,
|
message.content,
|
||||||
|
support_media_content=support_media_content,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
@ -367,7 +367,10 @@ class LLMModelAdapter(ABC):
|
|||||||
return roles
|
return roles
|
||||||
|
|
||||||
def transform_model_messages(
|
def transform_model_messages(
|
||||||
self, messages: List[ModelMessage], convert_to_compatible_format: bool = False
|
self,
|
||||||
|
messages: List[ModelMessage],
|
||||||
|
convert_to_compatible_format: bool = False,
|
||||||
|
support_media_content: bool = True,
|
||||||
) -> List[Dict[str, str]]:
|
) -> List[Dict[str, str]]:
|
||||||
"""Transform the model messages
|
"""Transform the model messages
|
||||||
|
|
||||||
@ -392,6 +395,7 @@ class LLMModelAdapter(ABC):
|
|||||||
messages (List[ModelMessage]): The model messages
|
messages (List[ModelMessage]): The model messages
|
||||||
convert_to_compatible_format (bool, optional): Whether to convert to
|
convert_to_compatible_format (bool, optional): Whether to convert to
|
||||||
compatible format. Defaults to False.
|
compatible format. Defaults to False.
|
||||||
|
support_media_content (bool, optional): Whether to support media content
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Dict[str, str]]: The transformed model messages
|
List[Dict[str, str]]: The transformed model messages
|
||||||
@ -399,14 +403,20 @@ class LLMModelAdapter(ABC):
|
|||||||
logger.info(f"support_system_message: {self.support_system_message}")
|
logger.info(f"support_system_message: {self.support_system_message}")
|
||||||
if not self.support_system_message and convert_to_compatible_format:
|
if not self.support_system_message and convert_to_compatible_format:
|
||||||
# We will not do any transform in the future
|
# We will not do any transform in the future
|
||||||
return self._transform_to_no_system_messages(messages)
|
return self._transform_to_no_system_messages(
|
||||||
|
messages, support_media_content=support_media_content
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
return ModelMessage.to_common_messages(
|
return ModelMessage.to_common_messages(
|
||||||
messages, convert_to_compatible_format=convert_to_compatible_format
|
messages,
|
||||||
|
convert_to_compatible_format=convert_to_compatible_format,
|
||||||
|
support_media_content=support_media_content,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _transform_to_no_system_messages(
|
def _transform_to_no_system_messages(
|
||||||
self, messages: List[ModelMessage]
|
self,
|
||||||
|
messages: List[ModelMessage],
|
||||||
|
support_media_content: bool = True,
|
||||||
) -> List[Dict[str, str]]:
|
) -> List[Dict[str, str]]:
|
||||||
"""Transform the model messages to no system messages
|
"""Transform the model messages to no system messages
|
||||||
|
|
||||||
@ -433,7 +443,9 @@ class LLMModelAdapter(ABC):
|
|||||||
Returns:
|
Returns:
|
||||||
List[Dict[str, str]]: The transformed model messages
|
List[Dict[str, str]]: The transformed model messages
|
||||||
"""
|
"""
|
||||||
openai_messages = ModelMessage.to_common_messages(messages)
|
openai_messages = ModelMessage.to_common_messages(
|
||||||
|
messages, support_media_content=support_media_content
|
||||||
|
)
|
||||||
system_messages = []
|
system_messages = []
|
||||||
return_messages = []
|
return_messages = []
|
||||||
for message in openai_messages:
|
for message in openai_messages:
|
||||||
|
@ -600,6 +600,63 @@ class QwenMoeAdapter(NewHFChatModelAdapter):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Qwen3Adapter(QwenAdapter):
|
||||||
|
support_4bit: bool = True
|
||||||
|
support_8bit: bool = True
|
||||||
|
|
||||||
|
def do_match(self, lower_model_name_or_path: Optional[str] = None):
|
||||||
|
return lower_model_name_or_path and (
|
||||||
|
"qwen3" in lower_model_name_or_path
|
||||||
|
and "base" not in lower_model_name_or_path
|
||||||
|
)
|
||||||
|
|
||||||
|
def check_transformer_version(self, current_version: str) -> None:
|
||||||
|
if not current_version >= "4.51.0":
|
||||||
|
raise ValueError(
|
||||||
|
"Qwen3 require transformers.__version__>=4.51.0, please upgrade your"
|
||||||
|
" transformers package."
|
||||||
|
)
|
||||||
|
|
||||||
|
def is_reasoning_model(
|
||||||
|
self,
|
||||||
|
deploy_model_params: LLMDeployModelParameters,
|
||||||
|
lower_model_name_or_path: Optional[str] = None,
|
||||||
|
) -> bool:
|
||||||
|
if (
|
||||||
|
deploy_model_params.reasoning_model is not None
|
||||||
|
and deploy_model_params.reasoning_model is False
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get_str_prompt(
|
||||||
|
self,
|
||||||
|
params: Dict,
|
||||||
|
messages: List[ModelMessage],
|
||||||
|
tokenizer: Any,
|
||||||
|
prompt_template: str = None,
|
||||||
|
convert_to_compatible_format: bool = False,
|
||||||
|
) -> Optional[str]:
|
||||||
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
|
if not tokenizer:
|
||||||
|
raise ValueError("tokenizer is is None")
|
||||||
|
tokenizer: AutoTokenizer = tokenizer
|
||||||
|
|
||||||
|
is_reasoning_model = params.get("is_reasoning_model", True)
|
||||||
|
messages = self.transform_model_messages(
|
||||||
|
messages, convert_to_compatible_format, support_media_content=False
|
||||||
|
)
|
||||||
|
logger.debug(f"The messages after transform: \n{messages}")
|
||||||
|
str_prompt = tokenizer.apply_chat_template(
|
||||||
|
messages,
|
||||||
|
tokenize=False,
|
||||||
|
add_generation_prompt=True,
|
||||||
|
enable_thinking=is_reasoning_model,
|
||||||
|
)
|
||||||
|
return str_prompt
|
||||||
|
|
||||||
|
|
||||||
class QwenOmniAdapter(NewHFChatModelAdapter):
|
class QwenOmniAdapter(NewHFChatModelAdapter):
|
||||||
def do_match(self, lower_model_name_or_path: Optional[str] = None):
|
def do_match(self, lower_model_name_or_path: Optional[str] = None):
|
||||||
return lower_model_name_or_path and (
|
return lower_model_name_or_path and (
|
||||||
@ -997,6 +1054,7 @@ register_model_adapter(Gemma2Adapter)
|
|||||||
register_model_adapter(StarlingLMAdapter)
|
register_model_adapter(StarlingLMAdapter)
|
||||||
register_model_adapter(QwenAdapter)
|
register_model_adapter(QwenAdapter)
|
||||||
register_model_adapter(QwenMoeAdapter)
|
register_model_adapter(QwenMoeAdapter)
|
||||||
|
register_model_adapter(Qwen3Adapter)
|
||||||
register_model_adapter(QwenOmniAdapter)
|
register_model_adapter(QwenOmniAdapter)
|
||||||
register_model_adapter(Llama3Adapter)
|
register_model_adapter(Llama3Adapter)
|
||||||
register_model_adapter(Llama31Adapter)
|
register_model_adapter(Llama31Adapter)
|
||||||
|
@ -369,6 +369,13 @@ class DefaultModelWorker(ModelWorker):
|
|||||||
def _prepare_generate_stream(
|
def _prepare_generate_stream(
|
||||||
self, params: Dict, span_operation_name: str, is_stream=True
|
self, params: Dict, span_operation_name: str, is_stream=True
|
||||||
):
|
):
|
||||||
|
if self.llm_adapter.is_reasoning_model(
|
||||||
|
self._model_params, self.model_name.lower()
|
||||||
|
):
|
||||||
|
params["is_reasoning_model"] = True
|
||||||
|
else:
|
||||||
|
params["is_reasoning_model"] = False
|
||||||
|
|
||||||
params, model_context = self.llm_adapter.model_adaptation(
|
params, model_context = self.llm_adapter.model_adaptation(
|
||||||
params,
|
params,
|
||||||
self.model_name,
|
self.model_name,
|
||||||
@ -427,10 +434,6 @@ class DefaultModelWorker(ModelWorker):
|
|||||||
span_params["messages"] = list(
|
span_params["messages"] = list(
|
||||||
map(lambda m: m.dict(), span_params["messages"])
|
map(lambda m: m.dict(), span_params["messages"])
|
||||||
)
|
)
|
||||||
if self.llm_adapter.is_reasoning_model(
|
|
||||||
self._model_params, self.model_name.lower()
|
|
||||||
):
|
|
||||||
params["is_reasoning_model"] = True
|
|
||||||
|
|
||||||
metadata = {
|
metadata = {
|
||||||
"is_async_func": self.support_async(),
|
"is_async_func": self.support_async(),
|
||||||
|
Loading…
Reference in New Issue
Block a user