mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-06 19:40:28 +00:00
add paged-attetionv2: support seq length split across thread block (#5707)
This commit is contained in:
@@ -338,7 +338,8 @@ class NopadBaichuanAttention(ParallelModule):
|
||||
block_size,
|
||||
kv_seq_len,
|
||||
fd_inter_tensor.mid_output,
|
||||
fd_inter_tensor.mid_output_lse,
|
||||
fd_inter_tensor.exp_sums,
|
||||
fd_inter_tensor.max_logits,
|
||||
self.alibi_slopes,
|
||||
sm_scale,
|
||||
)
|
||||
|
@@ -596,7 +596,8 @@ class NopadLlamaAttention(LlamaAttention, ParallelModule):
|
||||
block_size,
|
||||
kv_seq_len,
|
||||
fd_inter_tensor.mid_output,
|
||||
fd_inter_tensor.mid_output_lse,
|
||||
fd_inter_tensor.exp_sums,
|
||||
fd_inter_tensor.max_logits,
|
||||
None,
|
||||
sm_scale,
|
||||
)
|
||||
|
Reference in New Issue
Block a user