fix the ring attn

This commit is contained in:
wangbluo
2024-09-25 18:34:29 +08:00
parent 10e4f7da72
commit cfd9eda628
2 changed files with 25 additions and 14 deletions

View File

@@ -563,12 +563,14 @@ def get_llama_flash_attention_forward(shard_config: ShardConfig, sp_mode=None, s
key_states = repeat_kv(key_states, self.num_key_value_groups)
value_states = repeat_kv(value_states, self.num_key_value_groups)
tp_group = shard_config.tensor_parallel_process_group
if sp_mode == "ring_attn":
attn_output = RingAttention.attention(
query_states,
key_states,
value_states,
sp_group,
tp_group,
**attention_mask,
inner_ring_size=shard_config.inner_ring_size,
)