mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-03 18:19:58 +00:00
[fx]add uniform policy (#1208)
* [CLI] add CLI launcher
* Revert "[CLI] add CLI launcher"
This reverts commit df7e6506d4
.
* [fx]add uniform policy
This commit is contained in:
@@ -32,6 +32,35 @@ def balanced_split_pass(gm: torch.fx.GraphModule, pp_size: int):
|
||||
return gm
|
||||
|
||||
|
||||
def uniform_split_pass(gm: torch.fx.GraphModule, pp_size: int):
|
||||
mod_graph = gm.graph
|
||||
valid_children_size = 0
|
||||
valid_children = []
|
||||
for module in mod_graph.owning_module.children():
|
||||
valid_children_size += 1
|
||||
valid_children.append(module)
|
||||
|
||||
if valid_children_size < pp_size:
|
||||
# If valid children is not enough to shard, we will use balanced policy instead of uniform policy.
|
||||
return balanced_split_pass(gm, pp_size)
|
||||
layers_per_partition = valid_children_size // pp_size
|
||||
accumulate_layer_amount = 0
|
||||
for node in mod_graph.nodes:
|
||||
if pp_size <= 1:
|
||||
break
|
||||
if node.op == "call_module":
|
||||
target_module = node.graph.owning_module.get_submodule(node.target)
|
||||
if target_module in valid_children:
|
||||
accumulate_layer_amount += 1
|
||||
if accumulate_layer_amount == layers_per_partition:
|
||||
accumulate_layer_amount = 0
|
||||
pp_size -= 1
|
||||
with mod_graph.inserting_after(node):
|
||||
split_node = mod_graph.create_node('call_function', pipe_split)
|
||||
gm.recompile()
|
||||
return gm
|
||||
|
||||
|
||||
def split_with_split_nodes_pass(annotated_gm: torch.fx.GraphModule):
|
||||
part_idx = 0
|
||||
|
||||
|
Reference in New Issue
Block a user