feat(model): Support GLM-4-0414 (#2630)

This commit is contained in:
Fangyin Cheng 2025-04-15 22:48:18 +08:00 committed by GitHub
parent 7de6937865
commit 17b7e5ba0c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 6021 additions and 3801 deletions

View File

@ -147,6 +147,9 @@ hf_qwen_omni = [
# uv pip install git+https://github.com/huggingface/transformers@v4.51.3-Qwen2.5-Omni-preview
# "transformers>=4.52.0",
]
hf_glm4 = [
"transformers>=4.51.3",
]
hf_kimi = [
"transformers<4.51.3",
]
@ -165,6 +168,10 @@ conflicts = [
{ extra = "hf_qwen_omni" },
{ extra = "hf_kimi" },
],
[
{ extra = "hf_glm4" },
{ extra = "hf_kimi" },
],
]
[tool.hatch.build.targets.wheel]
packages = ["src/dbgpt"]

View File

@ -15,7 +15,6 @@ from dbgpt.model.adapter.model_metadata import (
COMMON_HF_QWEN25_MODELS,
)
from dbgpt.model.base import ModelType
from dbgpt.model.utils.media_utils import parse_messages
from dbgpt.util.i18n_utils import _
logger = logging.getLogger(__name__)
@ -295,6 +294,8 @@ class NewHFChatModelAdapter(LLMModelAdapter, ABC):
messages: The messages to load.
tokenizer: The tokenizer to use.
"""
from dbgpt.model.utils.media_utils import parse_messages
results = parse_messages(messages)
if "images" in results and results["images"]:
params["images"] = results["images"]
@ -879,6 +880,38 @@ class GLM4Adapter(NewHFChatModelAdapter):
)
class GLM40414Adapter(NewHFChatModelAdapter):
"""
https://huggingface.co/collections/THUDM/glm-4-0414-67f3cbcb34dd9d252707cb2e
"""
support_4bit: bool = True
support_8bit: bool = True
def do_match(self, lower_model_name_or_path: Optional[str] = None):
return (
lower_model_name_or_path
and "glm-4" in lower_model_name_or_path
and "0414" in lower_model_name_or_path
and "base" not in lower_model_name_or_path
) or (lower_model_name_or_path and "glm-z1" in lower_model_name_or_path)
def use_fast_tokenizer(self) -> bool:
return True
def is_reasoning_model(
self,
deploy_model_params: LLMDeployModelParameters,
lower_model_name_or_path: Optional[str] = None,
) -> bool:
if (
deploy_model_params.reasoning_model is not None
and deploy_model_params.reasoning_model
):
return True
return lower_model_name_or_path and "z1" in lower_model_name_or_path
class Codegeex4Adapter(GLM4Adapter):
"""
https://huggingface.co/THUDM/codegeex4-all-9b
@ -974,6 +1007,7 @@ register_model_adapter(PhiAdapter)
register_model_adapter(SQLCoderAdapter)
register_model_adapter(OpenChatAdapter)
register_model_adapter(GLM4Adapter, supported_models=COMMON_HF_GLM_MODELS)
register_model_adapter(GLM40414Adapter)
register_model_adapter(Codegeex4Adapter)
register_model_adapter(Qwen2Adapter, supported_models=COMMON_HF_QWEN25_MODELS)
register_model_adapter(Internlm2Adapter)

View File

@ -216,7 +216,7 @@ def _hf_try_load_default_quantization_model(
if model:
logger.info(f"Load default quantization model {model_name} success")
return _hf_handle_model_and_tokenizer(
model, tokenizer, device, num_gpus, model_params
model, tokenizer, device, num_gpus, model_params, to=False
)
return None, None
except Exception as e:
@ -232,11 +232,13 @@ def _hf_handle_model_and_tokenizer(
device: str,
num_gpus: int,
model_params: HFLLMDeployModelParameters,
to: bool = True,
):
if (device == "cuda" and num_gpus == 1) or device == "mps" and tokenizer:
# TODO: Check cpu_offloading
try:
model.to(device)
if to:
model.to(device)
except ValueError:
pass
except AttributeError:

View File

@ -29,12 +29,12 @@ explicit = true
[tool.uv.workspace]
members = [
"packages/dbgpt-app",
"packages/dbgpt-app",
"packages/dbgpt-client",
"packages/dbgpt-core",
"packages/dbgpt-ext",
"packages/dbgpt-serve",
"packages/dbgpt-accelerator/dbgpt-acc*"
"packages/dbgpt-core",
"packages/dbgpt-ext",
"packages/dbgpt-serve",
"packages/dbgpt-accelerator/dbgpt-acc*",
]
[tool.uv]

9763
uv.lock

File diff suppressed because one or more lines are too long