[Hotfix] Fix llama fwd replacement bug (#6031)

Co-authored-by: Edenzzzz <wtan45@wisc.edu>
2025-09-13 21:22:49 +00:00 · 2024-08-23 15:44:27 +08:00
parent 39e2597426
commit 7cf9df07bc
1 changed files with 14 additions and 13 deletions
--- a/colossalai/shardformer/policies/llama.py
+++ b/colossalai/shardformer/policies/llama.py
@@ -95,19 +95,20 @@ class LlamaPolicy(Policy):
                policy=policy,
                target_key=attn_cls,
            )
-            if self.pipeline_stage_manager is None:
-                self.append_or_create_method_replacement(
-                    description={
-                        "forward": get_llama_flash_attention_model_forward(
-                            self.shard_config,
-                            sp_mode=sp_mode,
-                            sp_size=sp_size,
-                            sp_group=sp_group,
-                        ),
-                    },
-                    policy=policy,
-                    target_key=LlamaModel,
-                )
+
+        if self.pipeline_stage_manager is None:
+            self.append_or_create_method_replacement(
+                description={
+                    "forward": get_llama_flash_attention_model_forward(
+                        self.shard_config,
+                        sp_mode=sp_mode,
+                        sp_size=sp_size,
+                        sp_group=sp_group,
+                    ),
+                },
+                policy=policy,
+                target_key=LlamaModel,
+            )

        if self.shard_config.enable_tensor_parallelism:
            assert (