feat(model): Support MiniCPM4-8B (#2754)

2025-08-31 08:33:36 +00:00 · 2025-06-09 16:23:10 +08:00
parent 4afa2b84b9
commit 326d6a4248
7 changed files with 41 additions and 4 deletions
--- a/README.ja.md
+++ b/README.ja.md
@@ -162,6 +162,8 @@ DB-GPTのアーキテクチャは以下の図に示されています：
  私たちは、LLaMA/LLaMA2、Baichuan、ChatGLM、Wenxin、Tongyi、Zhipuなど、オープンソースおよびAPIエージェントからの数十の大規模言語モデル（LLM）を含む幅広いモデルをサポートしています。

  - ニュース
+    - 🔥🔥🔥  [DeepSeek-R1-0528](https://huggingface.co/deepseek-ai/DeepSeek-R1-0528)
+    - 🔥🔥🔥  [DeepSeek-V3-0324](https://huggingface.co/deepseek-ai/DeepSeek-V3-0324)
    - 🔥🔥🔥  [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
    - 🔥🔥🔥  [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
    - 🔥🔥🔥  [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
--- a/README.md
+++ b/README.md
@@ -169,6 +169,8 @@ At present, we have introduced several key features to showcase our current capa
  We offer extensive model support, including dozens of large language models (LLMs) from both open-source and API agents, such as LLaMA/LLaMA2, Baichuan, ChatGLM, Wenxin, Tongyi, Zhipu, and many more. 

  - News
+    - 🔥🔥🔥  [DeepSeek-R1-0528](https://huggingface.co/deepseek-ai/DeepSeek-R1-0528)
+    - 🔥🔥🔥  [DeepSeek-V3-0324](https://huggingface.co/deepseek-ai/DeepSeek-V3-0324)
    - 🔥🔥🔥  [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
    - 🔥🔥🔥  [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
    - 🔥🔥🔥  [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
--- a/README.zh.md
+++ b/README.zh.md
@@ -163,6 +163,8 @@
  海量模型支持，包括开源、API代理等几十种大语言模型。如LLaMA/LLaMA2、Baichuan、ChatGLM、文心、通义、智谱等。当前已支持如下模型: 

  - 新增支持模型
+    - 🔥🔥🔥  [DeepSeek-R1-0528](https://huggingface.co/deepseek-ai/DeepSeek-R1-0528)
+    - 🔥🔥🔥  [DeepSeek-V3-0324](https://huggingface.co/deepseek-ai/DeepSeek-V3-0324)
    - 🔥🔥🔥  [Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B)
    - 🔥🔥🔥  [Qwen3-30B-A3B](https://huggingface.co/Qwen/Qwen3-30B-A3B)
    - 🔥🔥🔥  [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B)
--- a/packages/dbgpt-core/src/dbgpt/model/adapter/hf_adapter.py
+++ b/packages/dbgpt-core/src/dbgpt/model/adapter/hf_adapter.py
@@ -1106,6 +1106,25 @@ class KimiVLAdapter(NewHFChatModelAdapter):
        return lower_model_name_or_path and "thinking" in lower_model_name_or_path


+class MiniCPMAdapter(NewHFChatModelAdapter):
+    """
+    https://huggingface.co/openbmb/MiniCPM4-8B
+    """
+
+    support_4bit: bool = True
+    support_8bit: bool = True
+
+    def do_match(self, lower_model_name_or_path: Optional[str] = None):
+        return lower_model_name_or_path and "minicpm" in lower_model_name_or_path
+
+    def load(self, model_path: str, from_pretrained_kwargs: dict):
+        if not from_pretrained_kwargs:
+            from_pretrained_kwargs = {}
+        if "trust_remote_code" not in from_pretrained_kwargs:
+            from_pretrained_kwargs["trust_remote_code"] = True
+        return super().load(model_path, from_pretrained_kwargs)
+
+
 # The following code is used to register the model adapter
 # The last registered model adapter is matched first
 register_model_adapter(CommonModelAdapter)  # For all of hf models can be matched
@@ -1137,3 +1156,4 @@ register_model_adapter(Qwen2VLAdapter)
 register_model_adapter(Internlm2Adapter)
 register_model_adapter(DeepseekV3R1Adapter, supported_models=COMMON_HF_DEEPSEEK__MODELS)
 register_model_adapter(KimiVLAdapter)
+register_model_adapter(MiniCPMAdapter)
--- a/packages/dbgpt-core/src/dbgpt/model/adapter/model_metadata.py
+++ b/packages/dbgpt-core/src/dbgpt/model/adapter/model_metadata.py
@@ -46,7 +46,18 @@ COMMON_HF_DEEPSEEK__MODELS = [
        function_calling=True,
    ),
    ModelMetadata(
-        model=["deepseek-ai/DeepSeek-V3"],
+        model=[
+            "deepseek-ai/DeepSeek-R1-0528",
+            "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
+        ],
+        context_length=128 * 1024,
+        max_output_length=64 * 1024,
+        description="DeepSeek-R1 by DeepSeek",
+        link="https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+        function_calling=True,
+    ),
+    ModelMetadata(
+        model=["deepseek-ai/DeepSeek-V3", "deepseek-ai/DeepSeek-V3-0324"],
        context_length=128 * 1024,
        max_output_length=8 * 1024,
        description="DeepSeek-V3 by DeepSeek",
--- a/packages/dbgpt-core/src/dbgpt/model/proxy/llms/siliconflow.py
+++ b/packages/dbgpt-core/src/dbgpt/model/proxy/llms/siliconflow.py
@@ -148,7 +148,7 @@ register_proxy_model_adapter(
            model=["deepseek-ai/DeepSeek-V3", "Pro/deepseek-ai/DeepSeek-V3"],
            context_length=64 * 1024,
            max_output_length=8 * 1024,
-            description="DeepSeek-V3 by DeepSeek",
+            description="DeepSeek-V3 by DeepSeek(DeepSeek-V3-0324)",
            link="https://siliconflow.cn/zh-cn/models",
            function_calling=True,
        ),
@@ -156,7 +156,7 @@ register_proxy_model_adapter(
            model=["deepseek-ai/DeepSeek-R1", "Pro/deepseek-ai/DeepSeek-R1"],
            context_length=64 * 1024,
            max_output_length=8 * 1024,
-            description="DeepSeek-R1 by DeepSeek",
+            description="DeepSeek-R1 by DeepSeek(DeepSeek-R1-0528)",
            link="https://siliconflow.cn/zh-cn/models",
            function_calling=True,
        ),
--- a/packages/dbgpt-core/src/dbgpt/model/proxy/llms/tongyi.py
+++ b/packages/dbgpt-core/src/dbgpt/model/proxy/llms/tongyi.py
@@ -143,7 +143,7 @@ register_proxy_model_adapter(
            function_calling=True,
        ),
        ModelMetadata(
-            model="deepseek-r1",
+            model=["deepseek-r1", "deepseek-r1-0528"],
            context_length=64 * 1024,
            max_output_length=8 * 1024,
            description="DeepSeek-R1 by DeepSeek",