[autoparallel] find repeat blocks (#2854)

* [autoparallel] find repeat blocks * polish * polish * polish
2025-09-09 13:00:52 +00:00 · 2023-02-23 17:28:19 +08:00
parent 2e16f842a9
commit 0f392d7403
3 changed files with 229 additions and 3 deletions
--- a/colossalai/auto_parallel/tensor_shard/utils/factory.py
+++ b/colossalai/auto_parallel/tensor_shard/utils/factory.py
@@ -1,13 +1,16 @@
+import copy
 import operator
 import warnings
 from functools import reduce
 from typing import Dict, List, Optional, Union

 import torch
+from torch.fx.node import Node
+from torch.utils._pytree import tree_map
+
 from colossalai.device.device_mesh import DeviceMesh
 from colossalai.tensor.shape_consistency import ShapeConsistencyManager
 from colossalai.tensor.sharding_spec import ShardingSpec
-from torch.fx.node import Node

 from ..constants import INFINITY_COST

@@ -18,7 +21,7 @@ def generate_sharding_spec(input_: Union[Node, torch.Tensor], device_mesh: Devic
                           dim_partition_dict: Dict[int, List[int]]) -> ShardingSpec:
    """
    Generate the sharding spec of the tensor based on the given dim_partition_dict.
-    
+

    Args:
        input_ (Union[Node, torch.Tensor]): the input can be a Node object or a PyTorch tensor. If a node is used, it will look for its meta data associated with this node.
@@ -59,7 +62,7 @@ def generate_resharding_costs(nodes: List[Node],
        nodes (List[Node]): a list of nodes
        sharding_spec_for_input(ShardingSpec): a list of ShardingSpec for the nodes.
        count_backward (Optional[bool]): whether to include the cost of resharding in the backward pass, default is True. False can be used for inference.
-        dtype (Optional[torch.dtype]): the data type for cost calculation, default is None. 
+        dtype (Optional[torch.dtype]): the data type for cost calculation, default is None.
    '''
    # The resharding_cost of weight is counted due to sharing weight cases.
    resharding_costs = {}
@@ -88,3 +91,116 @@ def generate_resharding_costs(nodes: List[Node],
                resharding_cost = INFINITY_COST
            resharding_costs[input_node].append(resharding_cost)
    return resharding_costs
+
+
+def find_repeat_blocks(node_list: List[torch.fx.Node], root_module, common_length_threshold: int = 20):
+    '''
+    Find the largest repeat blocks in the graph, whose length is larger than the threshold.
+
+    Args:
+        gm (GraphModule): the graph module to be analyzed.
+        common_length_threshold (int): the threshold of the repeat block length.
+    '''
+
+    # graph = gm.graph
+
+    def _process_args(args):
+        new_args = []
+        for arg in args:
+            if hasattr(arg, '_meta_data'):
+                meta_data = arg._meta_data
+            else:
+                meta_data = arg
+
+            def _process_arg(data):
+                if isinstance(data, torch.Tensor):
+                    data = data.size()
+                elif isinstance(data, slice):
+                    data = (data.start, data.step, data.stop)
+                return data
+
+            new_meta_data = tree_map(_process_arg, meta_data)
+            new_args.append(new_meta_data)
+
+        return new_args
+
+    def _all_equal(check_list, check_fn):
+        base_value = check_list[-1]
+        for e in check_list:
+            if not check_fn(e, base_value):
+                return False
+        return True
+
+    def _check_node_list_equal(l1, l2):
+        if len(l1) != len(l2):
+            return False
+        for node1, node2 in zip(l1, l2):
+            if hash(node1.hash_key) != hash(node2.hash_key):
+                return False
+        return True
+
+    def _check_node_equal(node1, node2):
+        if hash(node1.hash_key) == hash(node2.hash_key):
+            return True
+        return False
+
+    for index, node in enumerate(node_list):
+        if node.op == 'call_module':
+            target = node.target
+            submod = root_module.get_submodule(target)
+            submod_type = type(submod)
+            target = submod_type
+        else:
+            target = node.target
+
+        new_args = _process_args(node.args)
+
+        if node.op != 'get_attr':
+            hash_key = (node.op, target, *new_args)
+        else:
+            hash_key = (node.op,)
+
+        setattr(node, 'hash_key', hash_key)
+
+    hash_value_to_node_dict = {}
+
+    for index, node in enumerate(node_list):
+        hash_value = hash(node.hash_key)
+        if hash_value not in hash_value_to_node_dict:
+            hash_value_to_node_dict[hash_value] = []
+        hash_value_to_node_dict[hash_value].append(index)
+
+    # node_list = list(graph.nodes)
+
+    node_list_start = 0
+    max_common_length = common_length_threshold
+    common_blocks_index = []
+    for index, node in enumerate(node_list):
+        # the comparison will be triggered if a common node appears
+        if len(hash_value_to_node_dict[hash(node.hash_key)]) >= 2:
+            start_index_list = hash_value_to_node_dict[hash(node.hash_key)]
+            check_block_list = [node_list[start:start + max_common_length] for start in start_index_list]
+
+            common_label = True
+            if not _all_equal(check_block_list, _check_node_list_equal):
+                common_label = False
+
+            if common_label:
+                common_blocks_index = copy.deepcopy(start_index_list)
+                max_step = len(node_list) - common_blocks_index[-1] - max_common_length - 1
+
+                for i in range(max_step):
+                    # add assertion to avoid out of index
+                    next_node_list = [node_list[index + max_common_length + i] for index in start_index_list]
+                    if not _all_equal(next_node_list, _check_node_equal):
+                        max_step = i
+                        break
+                max_common_length += max_step
+                node_list_start += max_common_length
+
+    # recover common subgraph from the index
+    common_blocks = []
+    for start in common_blocks_index:
+        common_blocks.append(node_list[start:start + max_common_length])
+
+    return common_blocks