1
0
mirror of https://github.com/hpcaitech/ColossalAI.git synced 2025-04-30 20:55:17 +00:00

[bugfix] colo attn bug fix

This commit is contained in:
haze188 2024-07-24 08:43:36 +00:00
parent 2d73efdfdd
commit befe3100da

View File

@ -59,7 +59,7 @@ def init_deepseek():
num_attention_heads=8, num_attention_heads=8,
num_key_value_heads=8, num_key_value_heads=8,
# vocab_size=2200, # vocab_size=2200,
first_k_dense_replace=2, first_k_dense_replace=1,
attn_implementation="flash_attention_2", attn_implementation="flash_attention_2",
torch_dtype="float16", torch_dtype="float16",
n_routed_experts=8, n_routed_experts=8,
@ -68,6 +68,7 @@ def init_deepseek():
if hasattr(config, "pad_token_id"): if hasattr(config, "pad_token_id"):
config.pad_token_id = config.eos_token_id config.pad_token_id = config.eos_token_id
print(config)
model = transformers.AutoModel.from_config(config, trust_remote_code=True) model = transformers.AutoModel.from_config(config, trust_remote_code=True)
return model return model