mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-03 01:54:44 +00:00
feat(model): Support MiniCPM4-8B (#2754)
This commit is contained in:
@@ -162,6 +162,8 @@ DB-GPTのアーキテクチャは以下の図に示されています:
|
|||||||
私たちは、LLaMA/LLaMA2、Baichuan、ChatGLM、Wenxin、Tongyi、Zhipuなど、オープンソースおよびAPIエージェントからの数十の大規模言語モデル(LLM)を含む幅広いモデルをサポートしています。
|
私たちは、LLaMA/LLaMA2、Baichuan、ChatGLM、Wenxin、Tongyi、Zhipuなど、オープンソースおよびAPIエージェントからの数十の大規模言語モデル(LLM)を含む幅広いモデルをサポートしています。
|
||||||
|
|
||||||
- ニュース
|
- ニュース
|
||||||
|
- 🔥🔥🔥 [DeepSeek-R1-0528](https://huggingface.co/deepseek-ai/DeepSeek-R1-0528)
|
||||||
|
- 🔥🔥🔥 [DeepSeek-V3-0324](https://huggingface.co/deepseek-ai/DeepSeek-V3-0324)
|
||||||
- 🔥🔥🔥 [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
|
- 🔥🔥🔥 [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
|
||||||
- 🔥🔥🔥 [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
|
- 🔥🔥🔥 [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
|
||||||
- 🔥🔥🔥 [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
|
- 🔥🔥🔥 [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
|
||||||
|
@@ -169,6 +169,8 @@ At present, we have introduced several key features to showcase our current capa
|
|||||||
We offer extensive model support, including dozens of large language models (LLMs) from both open-source and API agents, such as LLaMA/LLaMA2, Baichuan, ChatGLM, Wenxin, Tongyi, Zhipu, and many more.
|
We offer extensive model support, including dozens of large language models (LLMs) from both open-source and API agents, such as LLaMA/LLaMA2, Baichuan, ChatGLM, Wenxin, Tongyi, Zhipu, and many more.
|
||||||
|
|
||||||
- News
|
- News
|
||||||
|
- 🔥🔥🔥 [DeepSeek-R1-0528](https://huggingface.co/deepseek-ai/DeepSeek-R1-0528)
|
||||||
|
- 🔥🔥🔥 [DeepSeek-V3-0324](https://huggingface.co/deepseek-ai/DeepSeek-V3-0324)
|
||||||
- 🔥🔥🔥 [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
|
- 🔥🔥🔥 [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
|
||||||
- 🔥🔥🔥 [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
|
- 🔥🔥🔥 [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
|
||||||
- 🔥🔥🔥 [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
|
- 🔥🔥🔥 [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
|
||||||
|
@@ -163,6 +163,8 @@
|
|||||||
海量模型支持,包括开源、API代理等几十种大语言模型。如LLaMA/LLaMA2、Baichuan、ChatGLM、文心、通义、智谱等。当前已支持如下模型:
|
海量模型支持,包括开源、API代理等几十种大语言模型。如LLaMA/LLaMA2、Baichuan、ChatGLM、文心、通义、智谱等。当前已支持如下模型:
|
||||||
|
|
||||||
- 新增支持模型
|
- 新增支持模型
|
||||||
|
- 🔥🔥🔥 [DeepSeek-R1-0528](https://huggingface.co/deepseek-ai/DeepSeek-R1-0528)
|
||||||
|
- 🔥🔥🔥 [DeepSeek-V3-0324](https://huggingface.co/deepseek-ai/DeepSeek-V3-0324)
|
||||||
- 🔥🔥🔥 [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
|
- 🔥🔥🔥 [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
|
||||||
- 🔥🔥🔥 [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
|
- 🔥🔥🔥 [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
|
||||||
- 🔥🔥🔥 [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
|
- 🔥🔥🔥 [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
|
||||||
|
@@ -1106,6 +1106,25 @@ class KimiVLAdapter(NewHFChatModelAdapter):
|
|||||||
return lower_model_name_or_path and "thinking" in lower_model_name_or_path
|
return lower_model_name_or_path and "thinking" in lower_model_name_or_path
|
||||||
|
|
||||||
|
|
||||||
|
class MiniCPMAdapter(NewHFChatModelAdapter):
|
||||||
|
"""
|
||||||
|
https://huggingface.co/openbmb/MiniCPM4-8B
|
||||||
|
"""
|
||||||
|
|
||||||
|
support_4bit: bool = True
|
||||||
|
support_8bit: bool = True
|
||||||
|
|
||||||
|
def do_match(self, lower_model_name_or_path: Optional[str] = None):
|
||||||
|
return lower_model_name_or_path and "minicpm" in lower_model_name_or_path
|
||||||
|
|
||||||
|
def load(self, model_path: str, from_pretrained_kwargs: dict):
|
||||||
|
if not from_pretrained_kwargs:
|
||||||
|
from_pretrained_kwargs = {}
|
||||||
|
if "trust_remote_code" not in from_pretrained_kwargs:
|
||||||
|
from_pretrained_kwargs["trust_remote_code"] = True
|
||||||
|
return super().load(model_path, from_pretrained_kwargs)
|
||||||
|
|
||||||
|
|
||||||
# The following code is used to register the model adapter
|
# The following code is used to register the model adapter
|
||||||
# The last registered model adapter is matched first
|
# The last registered model adapter is matched first
|
||||||
register_model_adapter(CommonModelAdapter) # For all of hf models can be matched
|
register_model_adapter(CommonModelAdapter) # For all of hf models can be matched
|
||||||
@@ -1137,3 +1156,4 @@ register_model_adapter(Qwen2VLAdapter)
|
|||||||
register_model_adapter(Internlm2Adapter)
|
register_model_adapter(Internlm2Adapter)
|
||||||
register_model_adapter(DeepseekV3R1Adapter, supported_models=COMMON_HF_DEEPSEEK__MODELS)
|
register_model_adapter(DeepseekV3R1Adapter, supported_models=COMMON_HF_DEEPSEEK__MODELS)
|
||||||
register_model_adapter(KimiVLAdapter)
|
register_model_adapter(KimiVLAdapter)
|
||||||
|
register_model_adapter(MiniCPMAdapter)
|
||||||
|
@@ -46,7 +46,18 @@ COMMON_HF_DEEPSEEK__MODELS = [
|
|||||||
function_calling=True,
|
function_calling=True,
|
||||||
),
|
),
|
||||||
ModelMetadata(
|
ModelMetadata(
|
||||||
model=["deepseek-ai/DeepSeek-V3"],
|
model=[
|
||||||
|
"deepseek-ai/DeepSeek-R1-0528",
|
||||||
|
"deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
||||||
|
],
|
||||||
|
context_length=128 * 1024,
|
||||||
|
max_output_length=64 * 1024,
|
||||||
|
description="DeepSeek-R1 by DeepSeek",
|
||||||
|
link="https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
|
||||||
|
function_calling=True,
|
||||||
|
),
|
||||||
|
ModelMetadata(
|
||||||
|
model=["deepseek-ai/DeepSeek-V3", "deepseek-ai/DeepSeek-V3-0324"],
|
||||||
context_length=128 * 1024,
|
context_length=128 * 1024,
|
||||||
max_output_length=8 * 1024,
|
max_output_length=8 * 1024,
|
||||||
description="DeepSeek-V3 by DeepSeek",
|
description="DeepSeek-V3 by DeepSeek",
|
||||||
|
@@ -148,7 +148,7 @@ register_proxy_model_adapter(
|
|||||||
model=["deepseek-ai/DeepSeek-V3", "Pro/deepseek-ai/DeepSeek-V3"],
|
model=["deepseek-ai/DeepSeek-V3", "Pro/deepseek-ai/DeepSeek-V3"],
|
||||||
context_length=64 * 1024,
|
context_length=64 * 1024,
|
||||||
max_output_length=8 * 1024,
|
max_output_length=8 * 1024,
|
||||||
description="DeepSeek-V3 by DeepSeek",
|
description="DeepSeek-V3 by DeepSeek(DeepSeek-V3-0324)",
|
||||||
link="https://siliconflow.cn/zh-cn/models",
|
link="https://siliconflow.cn/zh-cn/models",
|
||||||
function_calling=True,
|
function_calling=True,
|
||||||
),
|
),
|
||||||
@@ -156,7 +156,7 @@ register_proxy_model_adapter(
|
|||||||
model=["deepseek-ai/DeepSeek-R1", "Pro/deepseek-ai/DeepSeek-R1"],
|
model=["deepseek-ai/DeepSeek-R1", "Pro/deepseek-ai/DeepSeek-R1"],
|
||||||
context_length=64 * 1024,
|
context_length=64 * 1024,
|
||||||
max_output_length=8 * 1024,
|
max_output_length=8 * 1024,
|
||||||
description="DeepSeek-R1 by DeepSeek",
|
description="DeepSeek-R1 by DeepSeek(DeepSeek-R1-0528)",
|
||||||
link="https://siliconflow.cn/zh-cn/models",
|
link="https://siliconflow.cn/zh-cn/models",
|
||||||
function_calling=True,
|
function_calling=True,
|
||||||
),
|
),
|
||||||
|
@@ -143,7 +143,7 @@ register_proxy_model_adapter(
|
|||||||
function_calling=True,
|
function_calling=True,
|
||||||
),
|
),
|
||||||
ModelMetadata(
|
ModelMetadata(
|
||||||
model="deepseek-r1",
|
model=["deepseek-r1", "deepseek-r1-0528"],
|
||||||
context_length=64 * 1024,
|
context_length=64 * 1024,
|
||||||
max_output_length=8 * 1024,
|
max_output_length=8 * 1024,
|
||||||
description="DeepSeek-R1 by DeepSeek",
|
description="DeepSeek-R1 by DeepSeek",
|
||||||
|
Reference in New Issue
Block a user