Update bert.py

2025-08-06 18:43:58 +00:00 · 2025-05-27 10:57:06 +08:00 · 2025-05-27 10:57:06 +08:00 · 611c1247ba
commit 611c1247ba
parent 17654cb6cb
1 changed files with 0 additions and 1 deletions
--- a/colossalai/shardformer/modeling/bert.py
+++ b/colossalai/shardformer/modeling/bert.py
@ -1039,7 +1039,6 @@ def get_jit_fused_bert_output_forward():

 # Fix the tgt_len size in sequence parallel attention:
 # same with the one in BertSdpaSelfAttention forward in v4.51.3 transformers except the
-# _, _, tgt_len, _ = query_layer.shape
 def get_bert_sequence_parallel_attention_forward(shard_config: ShardConfig):
    from transformers.models.bert.modeling_bert import BertSdpaSelfAttention