mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-08-14 14:13:22 +00:00
[misc] Bypass the huggingface bug to solve the mask mismatch problem (#5991)
This commit is contained in:
parent
4dd03999ec
commit
887d2d579b
@ -666,6 +666,9 @@ def get_deepseek_flash_attention_model_forward(shard_config, sp_mode=None, sp_si
|
|||||||
if inputs_embeds is None:
|
if inputs_embeds is None:
|
||||||
inputs_embeds = self.embed_tokens(input_ids)
|
inputs_embeds = self.embed_tokens(input_ids)
|
||||||
|
|
||||||
|
# TODO: upgrade transformers to 4.44.0 to fix the bug, remove the hard code.
|
||||||
|
self._use_flash_attention_2 = shard_config.enable_flash_attention
|
||||||
|
self._use_sdpa = False if shard_config.enable_flash_attention else self._use_sdpa
|
||||||
if self._use_flash_attention_2:
|
if self._use_flash_attention_2:
|
||||||
# 2d mask is passed through the layers
|
# 2d mask is passed through the layers
|
||||||
attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
|
attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
|
||||||
|
Loading…
Reference in New Issue
Block a user