feat(model): Support MiniCPM4-8B (#2754)

This commit is contained in:
Fangyin Cheng
2025-06-09 16:23:10 +08:00
committed by GitHub
parent 4afa2b84b9
commit 326d6a4248
7 changed files with 41 additions and 4 deletions

View File

@@ -1106,6 +1106,25 @@ class KimiVLAdapter(NewHFChatModelAdapter):
return lower_model_name_or_path and "thinking" in lower_model_name_or_path
class MiniCPMAdapter(NewHFChatModelAdapter):
"""
https://huggingface.co/openbmb/MiniCPM4-8B
"""
support_4bit: bool = True
support_8bit: bool = True
def do_match(self, lower_model_name_or_path: Optional[str] = None):
return lower_model_name_or_path and "minicpm" in lower_model_name_or_path
def load(self, model_path: str, from_pretrained_kwargs: dict):
if not from_pretrained_kwargs:
from_pretrained_kwargs = {}
if "trust_remote_code" not in from_pretrained_kwargs:
from_pretrained_kwargs["trust_remote_code"] = True
return super().load(model_path, from_pretrained_kwargs)
# The following code is used to register the model adapter
# The last registered model adapter is matched first
register_model_adapter(CommonModelAdapter) # For all of hf models can be matched
@@ -1137,3 +1156,4 @@ register_model_adapter(Qwen2VLAdapter)
register_model_adapter(Internlm2Adapter)
register_model_adapter(DeepseekV3R1Adapter, supported_models=COMMON_HF_DEEPSEEK__MODELS)
register_model_adapter(KimiVLAdapter)
register_model_adapter(MiniCPMAdapter)

View File

@@ -46,7 +46,18 @@ COMMON_HF_DEEPSEEK__MODELS = [
function_calling=True,
),
ModelMetadata(
model=["deepseek-ai/DeepSeek-V3"],
model=[
"deepseek-ai/DeepSeek-R1-0528",
"deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
],
context_length=128 * 1024,
max_output_length=64 * 1024,
description="DeepSeek-R1 by DeepSeek",
link="https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
function_calling=True,
),
ModelMetadata(
model=["deepseek-ai/DeepSeek-V3", "deepseek-ai/DeepSeek-V3-0324"],
context_length=128 * 1024,
max_output_length=8 * 1024,
description="DeepSeek-V3 by DeepSeek",

View File

@@ -148,7 +148,7 @@ register_proxy_model_adapter(
model=["deepseek-ai/DeepSeek-V3", "Pro/deepseek-ai/DeepSeek-V3"],
context_length=64 * 1024,
max_output_length=8 * 1024,
description="DeepSeek-V3 by DeepSeek",
description="DeepSeek-V3 by DeepSeek(DeepSeek-V3-0324)",
link="https://siliconflow.cn/zh-cn/models",
function_calling=True,
),
@@ -156,7 +156,7 @@ register_proxy_model_adapter(
model=["deepseek-ai/DeepSeek-R1", "Pro/deepseek-ai/DeepSeek-R1"],
context_length=64 * 1024,
max_output_length=8 * 1024,
description="DeepSeek-R1 by DeepSeek",
description="DeepSeek-R1 by DeepSeek(DeepSeek-R1-0528)",
link="https://siliconflow.cn/zh-cn/models",
function_calling=True,
),

View File

@@ -143,7 +143,7 @@ register_proxy_model_adapter(
function_calling=True,
),
ModelMetadata(
model="deepseek-r1",
model=["deepseek-r1", "deepseek-r1-0528"],
context_length=64 * 1024,
max_output_length=8 * 1024,
description="DeepSeek-R1 by DeepSeek",