[Hotfix] Fix llama fwd replacement bug (#6031)

Co-authored-by: Edenzzzz <wtan45@wisc.edu>
This commit is contained in:
Wenxuan Tan
2024-08-23 15:44:27 +08:00
committed by GitHub
parent 39e2597426
commit 7cf9df07bc

View File

@@ -95,19 +95,20 @@ class LlamaPolicy(Policy):
policy=policy,
target_key=attn_cls,
)
if self.pipeline_stage_manager is None:
self.append_or_create_method_replacement(
description={
"forward": get_llama_flash_attention_model_forward(
self.shard_config,
sp_mode=sp_mode,
sp_size=sp_size,
sp_group=sp_group,
),
},
policy=policy,
target_key=LlamaModel,
)
if self.pipeline_stage_manager is None:
self.append_or_create_method_replacement(
description={
"forward": get_llama_flash_attention_model_forward(
self.shard_config,
sp_mode=sp_mode,
sp_size=sp_size,
sp_group=sp_group,
),
},
policy=policy,
target_key=LlamaModel,
)
if self.shard_config.enable_tensor_parallelism:
assert (