[shardformer] update llama2/opt finetune example and fix llama2 policy (#4645)

* [shardformer] update shardformer readme

[shardformer] update shardformer readme

[shardformer] update shardformer readme

* [shardformer] update llama2/opt finetune example and shardformer update to llama2

* [shardformer] update llama2/opt finetune example and shardformer update to llama2

* [shardformer] update llama2/opt finetune example and shardformer update to llama2

* [shardformer] change dataset

* [shardformer] change dataset

* [shardformer] fix CI

* [shardformer] fix

* [shardformer] fix

* [shardformer] fix

* [shardformer] fix

* [shardformer] fix

[example] update opt example

[example] resolve comments

fix

fix
This commit is contained in:
flybird11111
2023-09-09 22:45:36 +08:00
committed by GitHub
parent a686f9ddc8
commit 7486ed7d3a
12 changed files with 165 additions and 167 deletions

View File

@@ -1,3 +1,4 @@
import warnings
from typing import Callable, List, Optional, Tuple
import torch
@@ -392,6 +393,13 @@ def get_llama_flash_attention_forward():
from transformers.models.llama.modeling_llama import LlamaAttention, apply_rotary_pos_emb
llama_version = 2
try:
from transformers.models.llama.modeling_llama import repeat_kv
except:
warnings.warn("using llamav1, llamav1 hasn't repeat_kv function")
llama_version = 1
from colossalai.kernel.cuda_native import AttnMaskType, ColoAttention
def forward(
@@ -424,6 +432,11 @@ def get_llama_flash_attention_forward():
past_key_value = (key_states, value_states) if use_cache else None
# repeat k/v heads if n_kv_heads < n_heads
if llama_version == 2:
key_states = repeat_kv(key_states, self.num_key_value_groups)
value_states = repeat_kv(value_states, self.num_key_value_groups)
me_input_shape = (bsz, q_len, self.num_heads, self.head_dim)
query_states = query_states.transpose(1, 2).contiguous().view(*me_input_shape)
key_states = key_states.transpose(1, 2).contiguous().view(*me_input_shape)