[Inference]Adapt to baichuan2 13B (#5614)

* adapt to baichuan2 13B

* adapt to baichuan2 13B

* change BAICHUAN_MODEL_NAME_OR_PATH

* fix test_decoding_attn.py

* Modifications based on review comments.

* change BAICHUAN_MODEL_NAME_OR_PATH

* mv attn mask processes to test flash decoding

* mv get_alibi_slopes baichuan modeling

* fix bugs in test_baichuan.py
This commit is contained in:
yuehuayingxueluo
2024-04-25 23:11:30 +08:00
committed by GitHub
parent f342a93871
commit 3c91e3f176
10 changed files with 786 additions and 134 deletions

View File

@@ -1,12 +1,15 @@
import torch.nn as nn
from torch.nn import Parameter
from colossalai.inference.modeling.models.nopadding_baichuan import NopadBaichuanAttention, NopadBaichuanMLP
from colossalai.inference.modeling.models.nopadding_baichuan import (
NopadBaichuanAttention,
NopadBaichuanMLP,
baichuan_rmsnorm_forward,
)
from colossalai.inference.modeling.models.nopadding_llama import (
llama_causal_lm_forward,
llama_decoder_layer_forward,
llama_model_forward,
llama_rmsnorm_forward,
)
from colossalai.inference.utils import init_to_get_rotary
from colossalai.shardformer.policies.base_policy import ModulePolicyDescription, SubModuleReplacementDescription
@@ -21,26 +24,30 @@ class NoPaddingBaichuanModelInferPolicy(LlamaForCausalLMPolicy):
policy = super().module_policy()
decoder_attribute_replacement = {
"lm_head.weight": Parameter(
nn.functional.normalize(self.model.lm_head.weight).transpose(0, 1), requires_grad=False
),
"lm_head.weight": Parameter(nn.functional.normalize(self.model.lm_head.weight), requires_grad=False),
}
policy["BaichuanForCausalLM"] = ModulePolicyDescription(
attribute_replacement=decoder_attribute_replacement,
)
policy["DecoderLayer"] = ModulePolicyDescription(
sub_module_replacement=[
SubModuleReplacementDescription(
suffix="mlp",
target_module=NopadBaichuanMLP,
),
SubModuleReplacementDescription(
suffix="self_attn",
target_module=NopadBaichuanAttention,
),
]
)
# used for relpacing Baichuan 7B/13B decoder layer
for layer_name in ["DecoderLayer", "BaichuanLayer"]:
policy[layer_name] = ModulePolicyDescription(
sub_module_replacement=[
SubModuleReplacementDescription(
suffix="mlp",
target_module=NopadBaichuanMLP,
),
SubModuleReplacementDescription(
suffix="self_attn",
target_module=NopadBaichuanAttention,
),
]
)
self.append_or_create_method_replacement(
description={"forward": llama_decoder_layer_forward}, policy=policy, target_key=layer_name
)
self.append_or_create_method_replacement(
description={"forward": llama_causal_lm_forward}, policy=policy, target_key="BaichuanForCausalLM"
@@ -48,11 +55,9 @@ class NoPaddingBaichuanModelInferPolicy(LlamaForCausalLMPolicy):
self.append_or_create_method_replacement(
description={"forward": llama_model_forward}, policy=policy, target_key="BaichuanModel"
)
self.append_or_create_method_replacement(
description={"forward": llama_decoder_layer_forward}, policy=policy, target_key="DecoderLayer"
)
self.append_or_create_method_replacement(
description={"forward": llama_rmsnorm_forward}, policy=policy, target_key="RMSNorm"
description={"forward": baichuan_rmsnorm_forward}, policy=policy, target_key="RMSNorm"
)
return policy