From befe3100daef8886fc2540f05dc16501128bc961 Mon Sep 17 00:00:00 2001 From: haze188 Date: Wed, 24 Jul 2024 08:43:36 +0000 Subject: [PATCH] [bugfix] colo attn bug fix --- tests/kit/model_zoo/transformers/deepseek.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/kit/model_zoo/transformers/deepseek.py b/tests/kit/model_zoo/transformers/deepseek.py index f50996110..b8b446b57 100644 --- a/tests/kit/model_zoo/transformers/deepseek.py +++ b/tests/kit/model_zoo/transformers/deepseek.py @@ -59,7 +59,7 @@ def init_deepseek(): num_attention_heads=8, num_key_value_heads=8, # vocab_size=2200, - first_k_dense_replace=2, + first_k_dense_replace=1, attn_implementation="flash_attention_2", torch_dtype="float16", n_routed_experts=8, @@ -68,6 +68,7 @@ def init_deepseek(): if hasattr(config, "pad_token_id"): config.pad_token_id = config.eos_token_id + print(config) model = transformers.AutoModel.from_config(config, trust_remote_code=True) return model