mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-03 10:06:44 +00:00
[shardformer] update llama2/opt finetune example and fix llama2 policy (#4645)
* [shardformer] update shardformer readme [shardformer] update shardformer readme [shardformer] update shardformer readme * [shardformer] update llama2/opt finetune example and shardformer update to llama2 * [shardformer] update llama2/opt finetune example and shardformer update to llama2 * [shardformer] update llama2/opt finetune example and shardformer update to llama2 * [shardformer] change dataset * [shardformer] change dataset * [shardformer] fix CI * [shardformer] fix * [shardformer] fix * [shardformer] fix * [shardformer] fix * [shardformer] fix [example] update opt example [example] resolve comments fix fix
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import warnings
|
||||
from typing import Callable, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
@@ -392,6 +393,13 @@ def get_llama_flash_attention_forward():
|
||||
|
||||
from transformers.models.llama.modeling_llama import LlamaAttention, apply_rotary_pos_emb
|
||||
|
||||
llama_version = 2
|
||||
try:
|
||||
from transformers.models.llama.modeling_llama import repeat_kv
|
||||
except:
|
||||
warnings.warn("using llamav1, llamav1 hasn't repeat_kv function")
|
||||
llama_version = 1
|
||||
|
||||
from colossalai.kernel.cuda_native import AttnMaskType, ColoAttention
|
||||
|
||||
def forward(
|
||||
@@ -424,6 +432,11 @@ def get_llama_flash_attention_forward():
|
||||
|
||||
past_key_value = (key_states, value_states) if use_cache else None
|
||||
|
||||
# repeat k/v heads if n_kv_heads < n_heads
|
||||
if llama_version == 2:
|
||||
key_states = repeat_kv(key_states, self.num_key_value_groups)
|
||||
value_states = repeat_kv(value_states, self.num_key_value_groups)
|
||||
|
||||
me_input_shape = (bsz, q_len, self.num_heads, self.head_dim)
|
||||
query_states = query_states.transpose(1, 2).contiguous().view(*me_input_shape)
|
||||
key_states = key_states.transpose(1, 2).contiguous().view(*me_input_shape)
|
||||
|
Reference in New Issue
Block a user