[shardformer] fix pipeline forward error if custom layer distribution is used (#5189)

* Use self.[distribute_layers|get_stage_index] to exploit custom layer distribution * Change static methods for t5 layer distribution to member functions * Change static methods for whisper layer distribution to member functions * Replace whisper policy usage with self one * Fix test case to use non-static layer distribution methods * fix: fix typo --------- Co-authored-by: Wenhao Chen <cwher@outlook.com>
2025-09-26 04:03:58 +00:00 · 2024-03-27 01:57:00 -04:00
parent e6707a6e8d
commit 00525f7772
18 changed files with 136 additions and 106 deletions
--- a/colossalai/shardformer/policies/base_policy.py
+++ b/colossalai/shardformer/policies/base_policy.py
@@ -197,8 +197,7 @@ class Policy(ABC):
        """
        return []

-    @staticmethod
-    def distribute_layers(num_layers: int, num_stages: int) -> List[int]:
+    def distribute_layers(self, num_layers: int, num_stages: int) -> List[int]:
        """Divide layers into stages"""
        quotient = num_layers // num_stages
        remainder = num_layers % num_stages
@@ -213,8 +212,8 @@ class Policy(ABC):
                layers_per_stage[i] += 1
        return layers_per_stage

-    @staticmethod
    def get_stage_index(
+        self,
        layers_per_stage: List[int],
        stage: int,
        num_model_chunks: int = 1,
@@ -242,4 +241,4 @@ class Policy(ABC):
            end_idx = num_layers_per_stage_accumulated[stage + model_chunk * num_stages + 1]
            stage_indices.append([start_idx, end_idx])

-        return stage_indices[0] if num_model_chunks == 1 else stage_indices
+        return stage_indices[0] if num_model_chunks == 1 else stage_indices