[autoparallel] refactor and add rotorc. (#1789)

* [autoparallel] refactor and add rotorc. * [autoparallel] refactor and add rotorc.
2025-09-09 04:50:17 +00:00 · 2022-11-03 12:32:51 +08:00
parent 4d6e1284cb
commit e8a9bebc87
5 changed files with 333 additions and 129 deletions
--- a/colossalai/auto_parallel/checkpoint/operation.py
+++ b/colossalai/auto_parallel/checkpoint/operation.py
@@ -1,6 +1,6 @@
 import math
 from abc import ABC
-from typing import List
+from typing import Any, Iterable, List

 from torch.utils._pytree import tree_map

@@ -33,23 +33,25 @@ class Chain:
        self.xbar = xbar
        self.ftmp = ftmp
        self.btmp = btmp
-        self.length = len(ftime)
        if check_consistency and not self.check_lengths():
            raise AttributeError("In Chain, input lists do not have consistent lengths")

    def check_lengths(self):
-        return ((len(self.ftime) == self.length) and (len(self.btime) == self.length + 1)
-                and (len(self.x) == self.length + 1) and (len(self.ftmp) == self.length)
-                and (len(self.btmp) == self.length + 1) and (len(self.xbar) == self.length + 1))
+        return ((len(self.ftime) == len(self)) and (len(self.btime) == len(self) + 1) and (len(self.x) == len(self) + 1)
+                and (len(self.ftmp) == len(self)) and (len(self.btmp) == len(self) + 1)
+                and (len(self.xbar) == len(self) + 1))

    def __repr__(self):
        chain_list = []
-        for i in range(self.length):
+        for i in range(len(self)):
            chain_list.append((self.ftime[i], self.btime[i], self.x[i], self.xbar[i], self.ftmp[i], self.btmp[i]))
-        i = self.length
+        i = len(self)
        chain_list.append((None, self.btime[i], self.x[i], self.xbar[i], None, self.btmp[i]))
        return chain_list.__repr__()

+    def __len__(self):
+        return len(self.ftime)
+
    def discretize_all(self, unit: int):
        """Discretize the chain into a list of chains according to unit size."""
        discretizer = lambda val: math.ceil(val / unit)
@@ -163,79 +165,20 @@ class DiscardMemory(MemoryAccess):
    name = "DM"


-class Function:
+class Sequence(list):

-    def __init__(self, name, *args):
-        self.name = name
-        self.args = args
-        self.str_args = ','.join(str(v) for v in self.args)
-
-    def __repr__(self):
-        return "{n}({args})".format(n=self.name, args=self.str_args)
-
-
-class Sequence:
-
-    def __init__(self, function):
-        self.sequence = []    #List of Operation and Sequence
-        self.function = function    #Description the function (name and parameters)
+    def __init__(self):
+        super().__init__()

    def __repr__(self):
        return repr(self.list_operations())

    def list_operations(self):
        op_list = []
-        for x in self.sequence:
+        for x in self:
            if isinstance(x, Operation):
                op_list.append(x)
            else:
                assert isinstance(x, Sequence)
                op_list += x.list_operations()
        return op_list
-
-    def insert(self, operation):
-        self.sequence.append(operation)
-
-    def remove(self, operation_index):
-        del self.sequence[operation_index]
-
-    def insert_sequence(self, sequence):
-        self.sequence.append(sequence)
-
-    def shift(self, value):
-        for x in self.sequence:
-            x.shift(value)
-        return self
-
-    def remove_useless_write(self):
-        if self.sequence:
-            if isinstance(self.sequence[0], WriteMemory):
-                self.remove(0)
-        return self
-
-    def get_makespan(self, chain):
-        return sum(op.cost(chain) for op in self.list_operations())
-
-    def without_suffix(self):
-        ops = self.list_operations()
-        end_of_first_phase = [i for i in range(len(ops)) if type(ops[i]) is Loss][0]
-        try:
-            last_idx = max(i for i in range(end_of_first_phase) if not type(ops[i]) is ForwardEnable)
-        except ValueError:
-            last_idx = -1
-        if last_idx == end_of_first_phase - 1:
-            return (self, None)
-        chain_length = ops[end_of_first_phase -
-                           1].index    ## Some assumption here about the sequence (finishes with Forward_L
-        start_of_fwd_enable_chain = ops[last_idx + 1].index    ## And starts with B_L), but should be fine in practice
-        result = Sequence(Function("Strip", self.function.name, *self.function.args, start_of_fwd_enable_chain))
-        for i in range(last_idx + 1):
-            result.insert(ops[i])
-        result.insert(Loss())
-        for i in range(chain_length, start_of_fwd_enable_chain - 1, -1):
-            position = end_of_first_phase + 1 + (chain_length - i)
-            assert type(ops[position]) is Backward
-            assert ops[position].index == i
-        for i in range(end_of_first_phase + 1 + 1 + chain_length - start_of_fwd_enable_chain, len(ops)):
-            result.insert(ops[i])
-        return (result, start_of_fwd_enable_chain)