mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2026-01-05 23:54:53 +00:00
[autoparallel] remove deprecated codes (#2664)
This commit is contained in:
@@ -1,96 +0,0 @@
|
||||
from copy import deepcopy
|
||||
from pickletools import optimize
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.fx import GraphModule
|
||||
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.cost_graph import CostGraph
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.options import SolverOptions
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.strategies_constructor import StrategiesConstructor
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
|
||||
|
||||
class ConvModel(nn.Module):
|
||||
|
||||
def __init__(self, c_in, c_out):
|
||||
super().__init__()
|
||||
self.conv1 = nn.Conv2d(c_in, c_out, kernel_size=3)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
x = x * 2
|
||||
x = self.conv1(x)
|
||||
x = x / 2
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
def test_cost_graph():
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
mesh_shape = (2, 2)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
entire_shape = torch.Size((4, 16, 64, 64))
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = ConvModel(16, 32)
|
||||
input_sample = {'x': torch.rand(4, 16, 64, 64).to('meta')}
|
||||
|
||||
# graph():
|
||||
# %x : torch.Tensor [#users=1] = placeholder[target=x]
|
||||
# %mul : [#users=1] = call_function[target=operator.mul](args = (%x, 2), kwargs = {})
|
||||
# %conv1 : [#users=1] = call_module[target=conv1](args = (%mul,), kwargs = {})
|
||||
# %truediv : [#users=1] = call_function[target=operator.truediv](args = (%conv1, 2), kwargs = {})
|
||||
# %relu : [#users=1] = call_module[target=relu](args = (%truediv,), kwargs = {})
|
||||
# return relu
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
gm.recompile()
|
||||
|
||||
solver_options = SolverOptions(fast=True)
|
||||
strategies_constructor = StrategiesConstructor(graph, device_mesh, solver_options)
|
||||
strategies_constructor.build_strategies_and_cost()
|
||||
|
||||
# (x, mul):{(0, 0): 0}
|
||||
# (mul, conv1):{(0, 0): 65547.1, (0, 1): 65547.1, (0, 2): 65547.1, (0, 3): 65547.1, (0, 4): 131105.30000000002, (0, 5): 131105.30000000002, (0, 6): 65547.1, (0, 7): 65547.1, (0, 8): 65547.1, (0, 9): 65547.1, (0, 10): 0, (0, 11): 0, (0, 12): 0, (0, 13): 131105.30000000002, (0, 14): 131105.30000000002}
|
||||
# (conv1, truediv):{(0, 0): 0, (1, 0): inf, (2, 0): inf, (3, 0): inf, (4, 0): 0, (5, 0): inf, (6, 0): inf, (7, 0): inf, (8, 0): inf, (9, 0): inf, (10, 0): inf, (11, 0): inf, (12, 0): inf, (13, 0): inf, (14, 0): inf, (0, 1): inf, (1, 1): 0, (2, 1): inf, (3, 1): inf, (4, 1): inf, (5, 1): 0, (6, 1): inf, (7, 1): inf, (8, 1): inf, (9, 1): inf, (10, 1): inf, (11, 1): inf, (12, 1): inf, (13, 1): inf, (14, 1): inf, (0, 2): inf, (1, 2): inf, (2, 2): 0, (3, 2): inf, (4, 2): inf, (5, 2): inf, (6, 2): inf, (7, 2): inf, (8, 2): inf, (9, 2): inf, (10, 2): inf, (11, 2): inf, (12, 2): inf, (13, 2): inf, (14, 2): inf, (0, 3): inf, (1, 3): inf, (2, 3): inf, (3, 3): 0, (4, 3): inf, (5, 3): inf, (6, 3): inf, (7, 3): inf, (8, 3): inf, (9, 3): inf, (10, 3): inf, (11, 3): inf, (12, 3): inf, (13, 3): inf, (14, 3): inf, (0, 4): inf, (1, 4): inf, (2, 4): inf, (3, 4): inf, (4, 4): inf, (5, 4): inf, (6, 4): 0, (7, 4): inf, (8, 4): 0, (9, 4): inf, (10, 4): inf, (11, 4): inf, (12, 4): inf, (13, 4): inf, (14, 4): inf, (0, 5): inf, (1, 5): inf, (2, 5): inf, (3, 5): inf, (4, 5): inf, (5, 5): inf, (6, 5): inf, (7, 5): 0, (8, 5): inf, (9, 5): 0, (10, 5): inf, (11, 5): inf, (12, 5): inf, (13, 5): inf, (14, 5): inf, (0, 6): inf, (1, 6): inf, (2, 6): inf, (3, 6): inf, (4, 6): inf, (5, 6): inf, (6, 6): inf, (7, 6): inf, (8, 6): inf, (9, 6): inf, (10, 6): 0, (11, 6): 0, (12, 6): 0, (13, 6): inf, (14, 6): inf, (0, 7): inf, (1, 7): inf, (2, 7): inf, (3, 7): inf, (4, 7): inf, (5, 7): inf, (6, 7): inf, (7, 7): inf, (8, 7): inf, (9, 7): inf, (10, 7): inf, (11, 7): inf, (12, 7): inf, (13, 7): 0, (14, 7): inf, (0, 8): inf, (1, 8): inf, (2, 8): inf, (3, 8): inf, (4, 8): inf, (5, 8): inf, (6, 8): inf, (7, 8): inf, (8, 8): inf, (9, 8): inf, (10, 8): inf, (11, 8): inf, (12, 8): inf, (13, 8): inf, (14, 8): 0}
|
||||
# (truediv, relu):{(0, 0): 0, (1, 0): inf, (2, 0): inf, (3, 0): inf, (4, 0): inf, (5, 0): inf, (6, 0): inf, (7, 0): inf, (8, 0): inf, (0, 1): inf, (1, 1): 0, (2, 1): inf, (3, 1): inf, (4, 1): inf, (5, 1): inf, (6, 1): inf, (7, 1): inf, (8, 1): inf, (0, 2): inf, (1, 2): inf, (2, 2): 0, (3, 2): inf, (4, 2): inf, (5, 2): inf, (6, 2): inf, (7, 2): inf, (8, 2): inf, (0, 3): inf, (1, 3): inf, (2, 3): inf, (3, 3): 0, (4, 3): inf, (5, 3): inf, (6, 3): inf, (7, 3): inf, (8, 3): inf, (0, 4): inf, (1, 4): inf, (2, 4): inf, (3, 4): inf, (4, 4): 0, (5, 4): inf, (6, 4): inf, (7, 4): inf, (8, 4): inf, (0, 5): inf, (1, 5): inf, (2, 5): inf, (3, 5): inf, (4, 5): inf, (5, 5): 0, (6, 5): inf, (7, 5): inf, (8, 5): inf, (0, 6): inf, (1, 6): inf, (2, 6): inf, (3, 6): inf, (4, 6): inf, (5, 6): inf, (6, 6): 0, (7, 6): inf, (8, 6): inf, (0, 7): inf, (1, 7): inf, (2, 7): inf, (3, 7): inf, (4, 7): inf, (5, 7): inf, (6, 7): inf, (7, 7): 0, (8, 7): inf, (0, 8): inf, (1, 8): inf, (2, 8): inf, (3, 8): inf, (4, 8): inf, (5, 8): inf, (6, 8): inf, (7, 8): inf, (8, 8): 0}
|
||||
# (relu, output):{(0, 0): 246019.30000000002, (1, 0): 246019.30000000002, (2, 0): 123009.1, (3, 0): 123009.1, (4, 0): 123009.1, (5, 0): 123009.1, (6, 0): 0, (7, 0): 246019.30000000002, (8, 0): 246019.30000000002}
|
||||
cost_graph = CostGraph(strategies_constructor.leaf_strategies)
|
||||
|
||||
# construct all node pairs
|
||||
all_node_pairs = []
|
||||
|
||||
for node in graph.nodes:
|
||||
if node.op == 'output':
|
||||
continue
|
||||
for child in node.users.keys():
|
||||
all_node_pairs.append((node, child))
|
||||
|
||||
for node_pair in all_node_pairs:
|
||||
assert node_pair in cost_graph.edge_costs
|
||||
|
||||
# construct merged node pairs
|
||||
merged_node_pairs = []
|
||||
node_list = list(graph.nodes)
|
||||
# add (conv1_weight, conv2d), (conv1_bias, view), (conv2d, add), (view, add), (add, output), (x, conv2d) into check node pairs
|
||||
merged_node_pairs.append((node_list[0], node_list[4]))
|
||||
merged_node_pairs.append((node_list[2], node_list[4]))
|
||||
merged_node_pairs.append((node_list[3], node_list[5]))
|
||||
merged_node_pairs.append((node_list[5], node_list[6]))
|
||||
merged_node_pairs.append((node_list[4], node_list[6]))
|
||||
merged_node_pairs.append((node_list[6], node_list[-1]))
|
||||
cost_graph.simplify_graph()
|
||||
for node_pair in all_node_pairs:
|
||||
if node_pair in merged_node_pairs:
|
||||
assert node_pair in cost_graph.edge_costs
|
||||
else:
|
||||
assert node_pair not in cost_graph.edge_costs
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_cost_graph()
|
||||
@@ -1,118 +0,0 @@
|
||||
import torch
|
||||
from torch.fx import GraphModule
|
||||
import torch.nn as nn
|
||||
import pytest
|
||||
|
||||
from colossalai.fx.proxy import ColoProxy
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
from colossalai.tensor.sharding_spec import ShardingSpec, _DimSpec
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.op_handler.batch_norm_handler import BatchNormHandler
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
|
||||
|
||||
class BNModel(nn.Module):
|
||||
|
||||
def __init__(self, c):
|
||||
super().__init__()
|
||||
self.bn = nn.BatchNorm2d(c)
|
||||
|
||||
def forward(self, x):
|
||||
x = x * 2
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
def test_bn_handler():
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
mesh_shape = (2, 2)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
entire_shape = torch.Size((4, 16, 64, 64))
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = BNModel(16)
|
||||
input_sample = {'x': torch.rand(4, 16, 64, 64).to('meta')}
|
||||
# graph():
|
||||
# %x : torch.Tensor [#users=1] = placeholder[target=x]
|
||||
# %mul : [#users=1] = call_function[target=operator.mul](args = (%x, 2), kwargs = {})
|
||||
# %bn : [#users=1] = call_module[target=bn](args = (%mul,), kwargs = {})
|
||||
# return bn
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
gm.recompile()
|
||||
# [x, mul, bn, output]
|
||||
nodes = [node for node in gm.graph.nodes]
|
||||
|
||||
# find the sharding strategies for the input node of the bn node
|
||||
# strategies_for_input = [[R, R, R, R], [R, S0, R, R], [R, S1, R, R], [S0, R, R, R], [S0, S1, R, R], [S1, R, R, R], [S1, S0, R, R]]
|
||||
strategies_vector_for_input = StrategiesVector(nodes[1])
|
||||
sharding_option = (None, 0, 1)
|
||||
for first_sharding_index in sharding_option:
|
||||
for second_sharding_index in sharding_option:
|
||||
if first_sharding_index is not None and second_sharding_index == first_sharding_index:
|
||||
continue
|
||||
if first_sharding_index is None:
|
||||
first_dim_spec = _DimSpec([])
|
||||
else:
|
||||
first_dim_spec = _DimSpec([first_sharding_index])
|
||||
|
||||
if second_sharding_index is None:
|
||||
second_dim_spec = _DimSpec([])
|
||||
else:
|
||||
second_dim_spec = _DimSpec([second_sharding_index])
|
||||
|
||||
replica_dim_spec = _DimSpec([])
|
||||
sharding_sequence = [first_dim_spec, second_dim_spec, replica_dim_spec, replica_dim_spec]
|
||||
sharding_spec = ShardingSpec(device_mesh=device_mesh,
|
||||
entire_shape=entire_shape,
|
||||
sharding_sequence=sharding_sequence)
|
||||
strategy_name = str(sharding_spec.sharding_sequence)
|
||||
sharding_strategy = ShardingStrategy(name=strategy_name, output_sharding_spec=sharding_spec)
|
||||
strategies_vector_for_input.append(sharding_strategy)
|
||||
setattr(nodes[1], 'strategies_vector', strategies_vector_for_input)
|
||||
|
||||
# generate bn strategy
|
||||
strategies_vector = StrategiesVector(node=nodes[2])
|
||||
bn_handler = BatchNormHandler(
|
||||
node=nodes[2],
|
||||
device_mesh=device_mesh,
|
||||
strategies_vector=strategies_vector,
|
||||
)
|
||||
bn_handler.register_strategy()
|
||||
# ['RS0 = RS0 x S0', 'S1S0 = RS0 x S0', 'RS1 = RS1 x S1', 'S0S1 = RS1 x S1', 'RR = RR x R', 'S0R = RR x R', 'S1R = RR x R', 'S01R = RR x R', 'RS01 = RS01 x S01',
|
||||
# 'S0R = S0R x R WITH SYNC_BN', 'S1R = S1R x R WITH SYNC_BN', 'S0S1 = S0S1 x S1 WITH SYNC_BN', 'S1S0 = S1S0 x S0 WITH SYNC_BN', 'S01R = S01R x R WITH SYNC_BN']
|
||||
strategy_name_list = [strategy.name for strategy in bn_handler.strategies_vector]
|
||||
|
||||
# RS = RS x S and strategies based on it, such as
|
||||
# SS = RS x S
|
||||
assert 'RS0 = RS0 x S0' in strategy_name_list
|
||||
assert 'S1S0 = RS0 x S0' in strategy_name_list
|
||||
assert 'RS1 = RS1 x S1' in strategy_name_list
|
||||
assert 'S0S1 = RS1 x S1' in strategy_name_list
|
||||
|
||||
# RR = RR x R and strategies based on it, such as
|
||||
# SR = SR x R
|
||||
assert 'RR = RR x R' in strategy_name_list
|
||||
assert 'S0R = RR x R' in strategy_name_list
|
||||
assert 'S1R = RR x R' in strategy_name_list
|
||||
assert 'S01R = RR x R' in strategy_name_list
|
||||
|
||||
# RS01 = RS01 x S01
|
||||
assert 'RS01 = RS01 x S01' in strategy_name_list
|
||||
|
||||
# SR = SR x R WITH SYNC_BN
|
||||
assert 'S0R = S0R x R WITH SYNC_BN' in strategy_name_list
|
||||
assert 'S1R = S1R x R WITH SYNC_BN' in strategy_name_list
|
||||
|
||||
# SS = SS x S WITH SYNC_BN
|
||||
assert 'S0S1 = S0S1 x S1 WITH SYNC_BN' in strategy_name_list
|
||||
assert 'S1S0 = S1S0 x S0 WITH SYNC_BN' in strategy_name_list
|
||||
|
||||
# S01R = S01R x R WITH SYNC_BN
|
||||
assert 'S01R = S01R x R WITH SYNC_BN' in strategy_name_list
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_bn_handler()
|
||||
@@ -1,75 +0,0 @@
|
||||
from cProfile import run
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.fx import GraphModule
|
||||
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.options import SolverOptions
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.strategies_constructor import StrategiesConstructor
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
from colossalai.testing.pytest_wrapper import run_on_environment_flag
|
||||
|
||||
|
||||
class ConvModel(nn.Module):
|
||||
|
||||
def __init__(self, c_in, c_out):
|
||||
super().__init__()
|
||||
self.conv1 = nn.Conv2d(c_in, c_out, kernel_size=3, padding=1)
|
||||
self.conv2 = nn.Conv2d(c_in, c_out, kernel_size=3, padding=1, stride=2)
|
||||
|
||||
def forward(self, x):
|
||||
x1 = self.conv1(x)
|
||||
x2 = x1 + 1
|
||||
x1 = torch.reshape(x1, [1, -1, 64, 1])
|
||||
x3 = self.conv2(x1)
|
||||
x3 = torch.reshape(x3, [4, 1, 64, -1])
|
||||
x = x1 + x3
|
||||
|
||||
return x
|
||||
|
||||
|
||||
@run_on_environment_flag(name='AUTO_PARALLEL')
|
||||
def test_conv_handler():
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
mesh_shape = (2, 2)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = ConvModel(16, 32)
|
||||
input_sample = {'x': torch.rand(4, 16, 64, 64).to('meta')}
|
||||
# graph():
|
||||
# %x : torch.Tensor [#users=1] = placeholder[target=x]
|
||||
# %conv1 : [#users=2] = call_module[target=conv1](args = (%x,), kwargs = {})
|
||||
# %add : [#users=0] = call_function[target=operator.add](args = (%conv1, 1), kwargs = {})
|
||||
# %reshape : [#users=2] = call_function[target=torch.reshape](args = (%conv1, [1, -1, 64, 1]), kwargs = {})
|
||||
# %conv2 : [#users=1] = call_module[target=conv2](args = (%reshape,), kwargs = {})
|
||||
# %reshape_1 : [#users=1] = call_function[target=torch.reshape](args = (%conv2, [4, 1, 64, -1]), kwargs = {})
|
||||
# %add_1 : [#users=1] = call_function[target=operator.add](args = (%reshape, %reshape_1), kwargs = {})
|
||||
# return add_1
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
# [x, conv1, add, reshape, conv2, reshape_1, add_1, output]
|
||||
nodes = [node for node in gm.graph.nodes]
|
||||
solver_options = SolverOptions(fast=True)
|
||||
strategies_constructor = StrategiesConstructor(graph, device_mesh, solver_options)
|
||||
|
||||
strategies_constructor.build_strategies_and_cost()
|
||||
strategy_map = strategies_constructor.strategy_map
|
||||
# check a tensor add with a scalar case
|
||||
conv1_strategies = strategy_map[nodes[1]]
|
||||
add_strategies = strategy_map[nodes[2]]
|
||||
add_strategies_cover_list = [strategy.input_shardings[0].sharding_sequence for strategy in add_strategies]
|
||||
for strategy in conv1_strategies:
|
||||
assert strategy.output_sharding_spec.sharding_sequence in add_strategies_cover_list
|
||||
|
||||
# check two tensors element-wise add case
|
||||
add_1_strategies = strategy_map[nodes[6]]
|
||||
assert len(add_1_strategies) == 25
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_conv_handler()
|
||||
@@ -1,54 +0,0 @@
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.fx import GraphModule
|
||||
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.options import SolverOptions
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.strategies_constructor import StrategiesConstructor
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
from colossalai.testing.pytest_wrapper import run_on_environment_flag
|
||||
|
||||
|
||||
class MatmulModel(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def forward(self, x1, x2):
|
||||
x = torch.matmul(x1, x2)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
@run_on_environment_flag(name='AUTO_PARALLEL')
|
||||
def test_conv_handler():
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
mesh_shape = (2, 2)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = MatmulModel()
|
||||
input_sample = {'x1': torch.rand(4, 4, 8).to('meta'), 'x2': torch.rand(4, 1, 8, 4).to('meta')}
|
||||
# graph():
|
||||
# %x1 : torch.Tensor [#users=1] = placeholder[target=x1]
|
||||
# %x2 : torch.Tensor [#users=1] = placeholder[target=x2]
|
||||
# %matmul : [#users=1] = call_function[target=torch.matmul](args = (%x1, %x2), kwargs = {})
|
||||
# return matmul
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
# [x1, x2, matmul, output]
|
||||
nodes = [node for node in gm.graph.nodes]
|
||||
solver_options = SolverOptions(fast=True)
|
||||
strategies_constructor = StrategiesConstructor(graph, device_mesh, solver_options)
|
||||
|
||||
strategies_constructor.build_strategies_and_cost()
|
||||
strategy_map = strategies_constructor.strategy_map
|
||||
matmul_strategies = strategy_map[nodes[2]]
|
||||
assert len(matmul_strategies) == 30
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_conv_handler()
|
||||
@@ -1,90 +0,0 @@
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.fx import GraphModule
|
||||
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.op_handler.conv_handler import ConvHandler
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.options import SolverOptions
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.strategies_constructor import StrategiesConstructor
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
from colossalai.fx.proxy import ColoProxy
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
from colossalai.tensor.sharding_spec import ShardingSpec, _DimSpec
|
||||
|
||||
|
||||
class ConvModel(nn.Module):
|
||||
|
||||
def __init__(self, c_in, c_out):
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(c_in, c_out, kernel_size=3)
|
||||
|
||||
def forward(self, x):
|
||||
x = x * 2
|
||||
x = self.conv(x)
|
||||
return x
|
||||
|
||||
|
||||
def test_conv_handler():
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
mesh_shape = (2, 2)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
entire_shape = torch.Size((4, 16, 64, 64))
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = ConvModel(16, 32)
|
||||
input_sample = {'x': torch.rand(4, 16, 64, 64).to('meta')}
|
||||
# graph():
|
||||
# %x : torch.Tensor [#users=1] = placeholder[target=x]
|
||||
# %mul : [#users=1] = call_function[target=operator.mul](args = (%x, 2), kwargs = {})
|
||||
# %conv_weight : [#users=1] = get_attr[target=conv.weight]
|
||||
# %conv_bias : [#users=1] = get_attr[target=conv.bias]
|
||||
# %conv2d : [#users=1] = call_function[target=torch.conv2d](args = (%mul, %conv_weight), kwargs = {groups: 1, dilation: (1, 1), stride: (1, 1), padding: (0, 0)})
|
||||
# %view : [#users=1] = call_method[target=view](args = (%conv_bias, [1, -1, 1, 1]), kwargs = {})
|
||||
# %add : [#users=1] = call_function[target=operator.add](args = (%conv2d, %view), kwargs = {})
|
||||
# return add
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
gm.recompile()
|
||||
solver_options = SolverOptions(fast=True)
|
||||
strategies_constructor = StrategiesConstructor(graph, device_mesh, solver_options)
|
||||
|
||||
strategies_constructor.build_strategies_and_cost()
|
||||
conv_node = list(graph.nodes)[4]
|
||||
# ['S0S1 = S0R x RS1', 'S1S0 = S1R x RS0', 'S0R = S0R x RR', 'S1R = S1R x RR', 'S0R = S0S1 x S1R', 'S1R = S1S0 x S0R', 'RS1 = RS0 x S0S1', 'RS0 = RS1 x S1S0', 'RR = RS0 x S0R', 'RR = RS1 x S1R', 'RS0 = RR x RS0', 'RS1 = RR x RS1', 'RR = RR x RR', 'S01R = S01R x RR', 'RR = RS01 x S01R']
|
||||
strategy_name_list = [strategy.name for strategy in conv_node.strategies_vector]
|
||||
|
||||
# SS = SR x RS
|
||||
assert 'S0S1 = S0R x RS1' in strategy_name_list
|
||||
assert 'S1S0 = S1R x RS0' in strategy_name_list
|
||||
|
||||
# SR = SS x SR
|
||||
assert 'S0R = S0S1 x S1R' in strategy_name_list
|
||||
assert 'S1R = S1S0 x S0R' in strategy_name_list
|
||||
|
||||
# RS = RS x SS
|
||||
assert 'RS0 = RS1 x S1S0' in strategy_name_list
|
||||
assert 'RS1 = RS0 x S0S1' in strategy_name_list
|
||||
|
||||
# RS = RR x RS
|
||||
assert 'RS0 = RR x RS0' in strategy_name_list
|
||||
assert 'RS1 = RR x RS1' in strategy_name_list
|
||||
|
||||
# RR= RR x RR
|
||||
assert 'RR = RR x RR' in strategy_name_list
|
||||
|
||||
# SR = SR x RR
|
||||
assert 'S0R = S0R x RR' in strategy_name_list
|
||||
assert 'S1R = S1R x RR' in strategy_name_list
|
||||
assert 'S01R = S01R x RR' in strategy_name_list
|
||||
|
||||
# RR = RS x SR
|
||||
assert 'RR = RS0 x S0R' in strategy_name_list
|
||||
assert 'RR = RS1 x S1R' in strategy_name_list
|
||||
assert 'RR = RS01 x S01R' in strategy_name_list
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_conv_handler()
|
||||
@@ -1,83 +0,0 @@
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.fx import GraphModule
|
||||
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.op_handler.dot_handler import DotHandler
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.options import SolverOptions
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.strategies_constructor import StrategiesConstructor
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
from colossalai.fx.proxy import ColoProxy
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
from colossalai.tensor.sharding_spec import ShardingSpec, _DimSpec
|
||||
|
||||
|
||||
class LinearModel(nn.Module):
|
||||
|
||||
def __init__(self, in_features, out_features):
|
||||
super().__init__()
|
||||
self.linear = nn.Linear(in_features, out_features)
|
||||
|
||||
def forward(self, x):
|
||||
x = x * 2
|
||||
x = self.linear(x)
|
||||
return x
|
||||
|
||||
|
||||
@pytest.mark.skip('F.linear is not supported in deprecated handler')
|
||||
def test_dot_handler():
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
mesh_shape = (2, 2)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
entire_shape = torch.Size((4, 8))
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = LinearModel(8, 16)
|
||||
input_sample = {'x': torch.rand(4, 8).to('meta')}
|
||||
# graph():
|
||||
# %x : torch.Tensor [#users=1] = placeholder[target=x]
|
||||
# %mul : [#users=1] = call_function[target=operator.mul](args = (%x, 2), kwargs = {})
|
||||
# %linear_weight : [#users=1] = get_attr[target=linear.weight]
|
||||
# %linear_bias : [#users=1] = get_attr[target=linear.bias]
|
||||
# %linear : [#users=1] = call_function[target=torch._C._nn.linear](args = (%mul, %linear_weight), kwargs = {})
|
||||
# %add : [#users=1] = call_function[target=operator.add](args = (%linear, %linear_bias), kwargs = {})
|
||||
# return add
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
gm.recompile()
|
||||
solver_options = SolverOptions(fast=True)
|
||||
strategies_constructor = StrategiesConstructor(graph, device_mesh, solver_options)
|
||||
|
||||
strategies_constructor.build_strategies_and_cost()
|
||||
linear_node = list(graph.nodes)[4]
|
||||
|
||||
# ['S0S1 = S0R x RS1', 'S1S0 = S1R x RS0', 'S0R = S0S1 x S1R', 'S1R = S1S0 x S0R', 'RS1 = RS0 x S0S1', 'RS0 = RS1 x S1S0', 'RS0 = RR x RS0', 'RS1 = RR x RS1', 'RR = RR x RR']
|
||||
strategy_name_list = [strategy.name for strategy in linear_node.strategies_vector]
|
||||
|
||||
# SS = SR x RS
|
||||
assert 'S0S1 = S0R x RS1' in strategy_name_list
|
||||
assert 'S1S0 = S1R x RS0' in strategy_name_list
|
||||
|
||||
# SR = SS x SR
|
||||
assert 'S0R = S0S1 x S1R' in strategy_name_list
|
||||
assert 'S1R = S1S0 x S0R' in strategy_name_list
|
||||
|
||||
# RS = RS x SS
|
||||
assert 'RS0 = RS1 x S1S0' in strategy_name_list
|
||||
assert 'RS1 = RS0 x S0S1' in strategy_name_list
|
||||
|
||||
# RR = RS x SR
|
||||
assert 'RR = RS0 x S0R' in strategy_name_list
|
||||
assert 'RR = RS1 x S1R' in strategy_name_list
|
||||
|
||||
# RS= RR x RS
|
||||
assert 'RS0 = RR x RS0' in strategy_name_list
|
||||
assert 'RS1 = RR x RS1' in strategy_name_list
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_dot_handler()
|
||||
@@ -1,70 +0,0 @@
|
||||
import torch
|
||||
from torch.fx import GraphModule
|
||||
import torch.nn as nn
|
||||
import pytest
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated import sharding_strategy
|
||||
|
||||
from colossalai.fx.proxy import ColoProxy
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
from colossalai.tensor.sharding_spec import ShardingSpec, _DimSpec
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.op_handler.layer_norm_handler import LayerNormHandler
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
|
||||
|
||||
class LNModel(nn.Module):
|
||||
|
||||
def __init__(self, c):
|
||||
super().__init__()
|
||||
self.ln = nn.LayerNorm(c)
|
||||
|
||||
def forward(self, x):
|
||||
x = x * 2
|
||||
x = self.ln(x)
|
||||
return x
|
||||
|
||||
|
||||
def test_bn_handler():
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
mesh_shape = (2, 2)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
entire_shape = torch.Size((4, 4, 128))
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = LNModel(128)
|
||||
input_sample = {'x': torch.rand(4, 4, 128).to('meta')}
|
||||
# graph():
|
||||
# %x : torch.Tensor [#users=1] = placeholder[target=x]
|
||||
# %mul : [#users=1] = call_function[target=operator.mul](args = (%x, 2), kwargs = {})
|
||||
# %ln : [#users=1] = call_module[target=ln](args = (%mul,), kwargs = {})
|
||||
# return ln
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
gm.recompile()
|
||||
# [x, mul, ln, output]
|
||||
nodes = [node for node in gm.graph.nodes]
|
||||
sharding_spec_for_input = ShardingSpec(device_mesh, entire_shape, {})
|
||||
sharding_strategy_for_input = ShardingStrategy('node_1', sharding_spec_for_input)
|
||||
strategies_vector_for_input = StrategiesVector(nodes[1])
|
||||
strategies_vector_for_input.append(sharding_strategy_for_input)
|
||||
setattr(nodes[1], 'strategies_vector', strategies_vector_for_input)
|
||||
|
||||
# generate bn strategy
|
||||
strategies_vector = StrategiesVector(node=nodes[2])
|
||||
ln_handler = LayerNormHandler(
|
||||
node=nodes[2],
|
||||
device_mesh=device_mesh,
|
||||
strategies_vector=strategies_vector,
|
||||
)
|
||||
ln_handler.register_strategy()
|
||||
# ['[S0, R, R] = [S0, R, R] x [R]', '[R, S0, R] = [R, S0, R] x [R]', '[S1, R, R] = [S1, R, R] x [R]', '[R, S1, R] = [R, S1, R] x [R]',
|
||||
# '[S0, S1, R] = [S0, S1, R] x [R]', '[S1, S0, R] = [S1, S0, R] x [R]', '[S01, R, R] = [S01, R, R] x [R]', '[R, S01, R] = [R, S01, R] x [R]', 'RR = RR x R']
|
||||
strategy_name_list = [strategy.name for strategy in ln_handler.strategies_vector]
|
||||
|
||||
assert len(strategy_name_list) == 9
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_bn_handler()
|
||||
@@ -1,59 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.fx import GraphModule
|
||||
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.options import SolverOptions
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.strategies_constructor import StrategiesConstructor
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
|
||||
|
||||
class ConvModel(nn.Module):
|
||||
|
||||
def __init__(self, c_in, c_out):
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(c_in, c_out, kernel_size=3)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = torch.flatten(x)
|
||||
return x
|
||||
|
||||
|
||||
def test_conv_handler():
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
mesh_shape = (2, 2)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = ConvModel(16, 32)
|
||||
input_sample = {'x': torch.rand(4, 16, 64, 64).to('meta')}
|
||||
# graph():
|
||||
# %x : torch.Tensor [#users=1] = placeholder[target=x]
|
||||
# %conv_weight : [#users=1] = get_attr[target=conv.weight]
|
||||
# %conv_bias : [#users=1] = get_attr[target=conv.bias]
|
||||
# %conv2d : [#users=1] = call_function[target=torch.conv2d](args = (%x, %conv_weight), kwargs = {groups: 1, dilation: (1, 1), stride: (1, 1), padding: (0, 0)})
|
||||
# %view : [#users=1] = call_method[target=view](args = (%conv_bias, [1, -1, 1, 1]), kwargs = {})
|
||||
# %add : [#users=1] = call_function[target=operator.add](args = (%conv2d, %view), kwargs = {})
|
||||
# %flatten : [#users=1] = call_function[target=torch.flatten](args = (%add,), kwargs = {})
|
||||
# return flatten
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
# [x, conv, flatten, output]
|
||||
nodes = [node for node in gm.graph.nodes]
|
||||
solver_options = SolverOptions(fast=True)
|
||||
strategies_constructor = StrategiesConstructor(graph, device_mesh, solver_options)
|
||||
|
||||
strategies_constructor.build_strategies_and_cost()
|
||||
strategy_map = strategies_constructor.strategy_map
|
||||
add_strategies = strategy_map[nodes[5]]
|
||||
flatten_strategies = strategy_map[nodes[6]]
|
||||
flatten_strategies_cover_list = [strategy.input_shardings[0].sharding_sequence for strategy in flatten_strategies]
|
||||
for strategy in add_strategies:
|
||||
assert strategy.output_sharding_spec.sharding_sequence in flatten_strategies_cover_list
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_conv_handler()
|
||||
@@ -1,66 +0,0 @@
|
||||
import torch
|
||||
from torch.fx import GraphModule
|
||||
import torch.nn as nn
|
||||
import pytest
|
||||
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.options import SolverOptions
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.strategies_constructor import StrategiesConstructor
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
from colossalai.testing.pytest_wrapper import run_on_environment_flag
|
||||
|
||||
|
||||
class ConvModel(nn.Module):
|
||||
|
||||
def __init__(self, dim_in, dim_out):
|
||||
super().__init__()
|
||||
self.dim_in = dim_in
|
||||
self.dim_out = dim_out
|
||||
|
||||
def forward(self, condition, x, y):
|
||||
output = torch.where(condition, x, y)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
@run_on_environment_flag(name='AUTO_PARALLEL')
|
||||
def test_where_handler():
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
mesh_shape = (2, 2)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = ConvModel(16, 32)
|
||||
input_sample = {
|
||||
'condition': torch.rand(16, 32).to('meta'),
|
||||
'x': torch.rand(16, 32).to('meta'),
|
||||
'y': torch.rand(16, 32).to('meta')
|
||||
}
|
||||
# graph():
|
||||
# %condition : torch.Tensor [#users=1] = placeholder[target=condition]
|
||||
# %x : torch.Tensor [#users=1] = placeholder[target=x]
|
||||
# %y : torch.Tensor [#users=1] = placeholder[target=y]
|
||||
# %where : [#users=1] = call_function[target=torch.where](args = (%condition, %x, %y), kwargs = {})
|
||||
# return where
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
|
||||
# [condition, x, y, where, output]
|
||||
nodes = [node for node in gm.graph.nodes]
|
||||
solver_options = SolverOptions(fast=True)
|
||||
strategies_constructor = StrategiesConstructor(graph, device_mesh, solver_options)
|
||||
|
||||
strategies_constructor.build_strategies_and_cost()
|
||||
strategy_map = strategies_constructor.strategy_map
|
||||
# check a tensor add with a scalar case
|
||||
where_node = strategy_map[nodes[3]]
|
||||
# ['[S0, S1] = [S0, S1] x [S0, S1] x [S0, S1]', '[S1, S0] = [S1, S0] x [S1, S0] x [S1, S0]', '[S01, R] = [S01, R] x [S01, R] x [S01, R]',
|
||||
# '[R, S01] = [R, S01] x [R, S01] x [R, S01]', '[S0, R] = [S0, R] x [S0, R] x [S0, R]', '[R, S0] = [R, S0] x [R, S0] x [R, S0]',
|
||||
# '[S1, R] = [S1, R] x [S1, R] x [S1, R]', '[R, S1] = [R, S1] x [R, S1] x [R, S1]', '[R, R] = [R, R] x [R, R] x [R, R]']
|
||||
assert len(where_node) == 9
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_where_handler()
|
||||
@@ -1,86 +0,0 @@
|
||||
from functools import partial
|
||||
import pytest
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
from torch.fx import GraphModule
|
||||
import torch.nn as nn
|
||||
import pytest
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.utils import free_port
|
||||
from colossalai.testing import rerun_if_address_is_in_use
|
||||
from colossalai.logging import disable_existing_loggers
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.cost_graph import CostGraph
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.graph_analysis import GraphAnalyser
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.strategies_constructor import StrategiesConstructor
|
||||
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
from colossalai.fx.passes.experimental.adding_shape_consistency_pass import shape_consistency_pass, solution_annotatation_pass
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated import Solver
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.options import SolverOptions
|
||||
from colossalai.testing.pytest_wrapper import run_on_environment_flag
|
||||
|
||||
|
||||
class ConvModel(nn.Module):
|
||||
|
||||
def __init__(self, c_in, c_out):
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(c_in, c_out, kernel_size=3, padding=1, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
return x
|
||||
|
||||
|
||||
def check_apply(rank, world_size, port):
|
||||
disable_existing_loggers()
|
||||
launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
||||
input = torch.rand(4, 4, 4, 4).cuda()
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
mesh_shape = (2, 2)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape, init_process_group=True)
|
||||
entire_shape = torch.Size((4, 4, 8, 8))
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = ConvModel(4, 4).cuda()
|
||||
origin_output = model(input)
|
||||
input_sample = {'x': torch.rand(4, 4, 4, 4).to('meta')}
|
||||
# graph():
|
||||
# %x : torch.Tensor [#users=1] = placeholder[target=x]
|
||||
# %conv : [#users=1] = call_module[target=conv](args = (%mul,), kwargs = {})
|
||||
# return conv
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
gm.recompile()
|
||||
solver_options = SolverOptions(fast=True)
|
||||
strategies_constructor = StrategiesConstructor(graph, device_mesh, solver_options)
|
||||
strategies_constructor.build_strategies_and_cost()
|
||||
|
||||
cost_graph = CostGraph(strategies_constructor.leaf_strategies)
|
||||
cost_graph.simplify_graph()
|
||||
graph_analyser = GraphAnalyser(gm)
|
||||
solver = Solver(gm.graph, strategies_constructor, cost_graph, graph_analyser)
|
||||
ret = solver.call_solver_serialized_args()
|
||||
solution = list(ret[0])
|
||||
sharding_spec_dict, origin_spec_dict = solution_annotatation_pass(gm, solution, device_mesh)
|
||||
shape_consistency_pass(gm)
|
||||
gm.recompile()
|
||||
nodes = [node for node in gm.graph.nodes]
|
||||
# TODO: wrap the gm to avoid the influence of the user training code
|
||||
output = gm(input, sharding_spec_dict, origin_spec_dict)
|
||||
assert output.equal(origin_output)
|
||||
|
||||
|
||||
@run_on_environment_flag(name='AUTO_PARALLEL')
|
||||
@pytest.mark.dist
|
||||
@rerun_if_address_is_in_use()
|
||||
def test_apply():
|
||||
world_size = 4
|
||||
run_func = partial(check_apply, world_size=world_size, port=free_port())
|
||||
mp.spawn(run_func, nprocs=world_size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_apply()
|
||||
@@ -1,79 +0,0 @@
|
||||
from copy import deepcopy
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.fx import GraphModule
|
||||
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated import Solver
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.cost_graph import CostGraph
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.graph_analysis import GraphAnalyser
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.options import SolverOptions
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.strategies_constructor import StrategiesConstructor
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
from colossalai.tensor.shape_consistency import ShapeConsistencyManager
|
||||
from colossalai.testing.pytest_wrapper import run_on_environment_flag
|
||||
|
||||
|
||||
class ConvModel(nn.Module):
|
||||
|
||||
def __init__(self, c_in, c_out):
|
||||
super().__init__()
|
||||
self.conv1 = nn.Conv2d(c_in, c_out, kernel_size=3)
|
||||
self.conv2 = nn.Conv2d(c_out, c_out, kernel_size=3)
|
||||
self.conv3 = nn.Conv2d(c_out, c_out, kernel_size=3)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
x = x * 2
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
x = x / 2
|
||||
x = self.conv3(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
@run_on_environment_flag(name='AUTO_PARALLEL')
|
||||
def test_solver():
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
mesh_shape = (2, 2)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
shape_consistency_manager = ShapeConsistencyManager()
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = ConvModel(16, 32)
|
||||
input_sample = {'x': torch.rand(4, 16, 64, 64).to('meta')}
|
||||
|
||||
# graph():
|
||||
# %x : torch.Tensor [#users=1] = placeholder[target=x]
|
||||
# %mul : [#users=1] = call_function[target=operator.mul](args = (%x, 2), kwargs = {})
|
||||
# %conv1 : [#users=1] = call_module[target=conv1](args = (%mul,), kwargs = {})
|
||||
# %conv2 : [#users=1] = call_module[target=conv2](args = (%conv1,), kwargs = {})
|
||||
# %truediv : [#users=1] = call_function[target=operator.truediv](args = (%conv2, 2), kwargs = {})
|
||||
# %conv3 : [#users=1] = call_module[target=conv3](args = (%truediv,), kwargs = {})
|
||||
# %relu : [#users=1] = call_module[target=relu](args = (%conv3,), kwargs = {})
|
||||
# return relu
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
|
||||
solver_options = SolverOptions(fast=True)
|
||||
strategies_constructor = StrategiesConstructor(graph, device_mesh, solver_options)
|
||||
strategies_constructor.build_strategies_and_cost()
|
||||
|
||||
cost_graph = CostGraph(strategies_constructor.leaf_strategies)
|
||||
cost_graph.simplify_graph()
|
||||
graph_analyser = GraphAnalyser(gm)
|
||||
solver = Solver(gm.graph, strategies_constructor, cost_graph, graph_analyser)
|
||||
ret = solver.call_solver_serialized_args()
|
||||
|
||||
# [ 0 0 13 13 13 13 13 0]
|
||||
strategies_combination_list = ret[0]
|
||||
assert solver.leaf_strategies[2][13].name == 'S01R = S01R x RR'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_solver()
|
||||
@@ -1,81 +0,0 @@
|
||||
import torch
|
||||
from torch.fx import GraphModule
|
||||
import torch.nn as nn
|
||||
import pytest
|
||||
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector
|
||||
from colossalai.tensor.shape_consistency import ShapeConsistencyManager
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.strategies_constructor import StrategiesConstructor
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.cost_graph import CostGraph
|
||||
from copy import deepcopy
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated import Solver
|
||||
import transformers
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.constants import *
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.graph_analysis import GraphAnalyser
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.options import SolverOptions
|
||||
from colossalai.testing.pytest_wrapper import run_on_environment_flag
|
||||
|
||||
BATCH_SIZE = 8
|
||||
SEQ_LENGHT = 8
|
||||
|
||||
|
||||
@run_on_environment_flag(name='AUTO_PARALLEL')
|
||||
def test_cost_graph():
|
||||
physical_mesh_id = torch.arange(0, 8)
|
||||
mesh_shape = (2, 4)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
shape_consistency_manager = ShapeConsistencyManager()
|
||||
|
||||
tracer = ColoTracer()
|
||||
config = transformers.GPT2Config(n_position=1024, n_layer=1, n_head=12)
|
||||
model = transformers.GPT2LMHeadModel(config=config)
|
||||
input_ids = torch.zeros((BATCH_SIZE, SEQ_LENGHT), dtype=torch.int64)
|
||||
token_type_ids = torch.zeros((BATCH_SIZE, SEQ_LENGHT), dtype=torch.int64)
|
||||
attention_mask = torch.zeros((BATCH_SIZE, SEQ_LENGHT), dtype=torch.int64)
|
||||
kwargs = dict(input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)
|
||||
meta_args = {k: v.to('meta') for k, v in kwargs.items()}
|
||||
|
||||
graph = tracer.trace(root=model, meta_args=meta_args)
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
gm.recompile()
|
||||
graph_analyser = GraphAnalyser(gm)
|
||||
liveness_list = graph_analyser.liveness_analysis()
|
||||
solver_options = SolverOptions(fast=True)
|
||||
strategies_constructor = StrategiesConstructor(graph, device_mesh, solver_options)
|
||||
print(graph)
|
||||
strategies_constructor.build_strategies_and_cost()
|
||||
for check_node, strategies_vector in strategies_constructor.strategy_map.items():
|
||||
print(check_node, len(strategies_vector))
|
||||
cost_graph = CostGraph(strategies_constructor.leaf_strategies)
|
||||
cost_graph.simplify_graph()
|
||||
# solver = Solver(gm.graph, strategies_constructor, cost_graph, graph_analyser, memory_budget=1620017824.0)
|
||||
solver = Solver(gm.graph, strategies_constructor, cost_graph, graph_analyser)
|
||||
|
||||
ret = solver.call_solver_serialized_args()
|
||||
print(ret)
|
||||
strategies_list = list(ret[0])
|
||||
print(strategies_list)
|
||||
computation_cost = 0
|
||||
communication_cost = 0
|
||||
memory_cost = 0
|
||||
nodes = [strategies_vector.node for strategies_vector in strategies_constructor.leaf_strategies]
|
||||
for index, node in enumerate(nodes):
|
||||
print(node.name, node.strategies_vector[strategies_list[index]].name)
|
||||
computation_cost += node.strategies_vector[strategies_list[index]].compute_cost
|
||||
communication_cost += node.strategies_vector[strategies_list[index]].communication_cost
|
||||
node_memory_cost = node.strategies_vector[strategies_list[index]].memory_cost
|
||||
if isinstance(node_memory_cost, tuple):
|
||||
node_memory_cost = node_memory_cost[0]
|
||||
memory_cost += node_memory_cost
|
||||
|
||||
print(f'computation cost is {computation_cost}')
|
||||
print(f'communication cost is {communication_cost}')
|
||||
print(f'memory cost is {memory_cost}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_cost_graph()
|
||||
@@ -1,94 +0,0 @@
|
||||
import torch
|
||||
from torch.fx import GraphModule
|
||||
import torch.nn as nn
|
||||
import pytest
|
||||
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector
|
||||
from colossalai.tensor.shape_consistency import ShapeConsistencyManager
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.strategies_constructor import StrategiesConstructor
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.cost_graph import CostGraph
|
||||
from copy import deepcopy
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated import Solver
|
||||
from torchvision.models import resnet34, resnet50
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.constants import *
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.graph_analysis import GraphAnalyser
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.options import SolverOptions
|
||||
from colossalai.testing.pytest_wrapper import run_on_environment_flag
|
||||
|
||||
|
||||
class MLP(torch.nn.Module):
|
||||
|
||||
def __init__(self, dim: int):
|
||||
super().__init__()
|
||||
self.linear1 = torch.nn.Linear(dim, dim * 4)
|
||||
self.linear2 = torch.nn.Linear(dim * 4, dim)
|
||||
self.dropout = torch.nn.Dropout(0)
|
||||
self.relu = torch.nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.linear1(x)
|
||||
x = self.dropout(x)
|
||||
x = self.relu(x)
|
||||
x = self.linear2(x)
|
||||
return x
|
||||
|
||||
|
||||
@run_on_environment_flag(name='AUTO_PARALLEL')
|
||||
def test_cost_graph():
|
||||
physical_mesh_id = torch.arange(0, 8)
|
||||
mesh_shape = (2, 4)
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
shape_consistency_manager = ShapeConsistencyManager()
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = MLP(32)
|
||||
|
||||
input_sample = {'x': torch.rand(16, 32).to('meta')}
|
||||
|
||||
# graph():
|
||||
# %x : torch.Tensor [#users=1] = placeholder[target=x]
|
||||
# %linear1 : [#users=1] = call_module[target=linear1](args = (%x,), kwargs = {})
|
||||
# %dropout : [#users=1] = call_module[target=dropout](args = (%linear1,), kwargs = {})
|
||||
# %relu : [#users=1] = call_module[target=relu](args = (%dropout,), kwargs = {})
|
||||
# %linear2 : [#users=1] = call_module[target=linear2](args = (%relu,), kwargs = {})
|
||||
# return linear2
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
gm.recompile()
|
||||
graph_analyser = GraphAnalyser(gm)
|
||||
liveness_list = graph_analyser.liveness_analysis()
|
||||
solver_options = SolverOptions(fast=True)
|
||||
strategies_constructor = StrategiesConstructor(graph, device_mesh, solver_options)
|
||||
strategies_constructor.build_strategies_and_cost()
|
||||
|
||||
cost_graph = CostGraph(strategies_constructor.leaf_strategies)
|
||||
cost_graph.simplify_graph()
|
||||
# # megatron mode if no memory constraints
|
||||
# solver = Solver(gm.graph, strategies_constructor, cost_graph, graph_analyser)
|
||||
# all sharding on out feature dim if memory budget is not sufficient for megatron mode
|
||||
solver = Solver(gm.graph, strategies_constructor, cost_graph, graph_analyser, memory_budget=5500.0)
|
||||
|
||||
ret = solver.call_solver_serialized_args()
|
||||
strategies_list = list(ret[0])
|
||||
computation_cost = 0
|
||||
communication_cost = 0
|
||||
memory_cost = 0
|
||||
for index, node in enumerate(graph.nodes):
|
||||
print(node.name, node.strategies_vector[strategies_list[index]].name)
|
||||
computation_cost += node.strategies_vector[strategies_list[index]].compute_cost
|
||||
communication_cost += node.strategies_vector[strategies_list[index]].communication_cost
|
||||
node_memory_cost = node.strategies_vector[strategies_list[index]].memory_cost
|
||||
if isinstance(node_memory_cost, tuple):
|
||||
node_memory_cost = node_memory_cost[0]
|
||||
memory_cost += node_memory_cost
|
||||
|
||||
print(f'computation cost is {computation_cost}')
|
||||
print(f'communication cost is {communication_cost}')
|
||||
print(f'memory cost is {memory_cost}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_cost_graph()
|
||||
@@ -1,103 +0,0 @@
|
||||
from copy import deepcopy
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.fx import GraphModule
|
||||
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.op_handler.conv_handler import CONV_STRATEGIES_LIST
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.options import SolverOptions
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector
|
||||
from colossalai.auto_parallel.tensor_shard.deprecated.strategies_constructor import StrategiesConstructor
|
||||
from colossalai.device.device_mesh import DeviceMesh
|
||||
from colossalai.fx.proxy import ColoProxy
|
||||
from colossalai.fx.tracer.tracer import ColoTracer
|
||||
from colossalai.tensor.sharding_spec import ShardingSpec, _DimSpec
|
||||
|
||||
|
||||
class ConvModel(nn.Module):
|
||||
|
||||
def __init__(self, c_in, c_out):
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(c_in, c_out, kernel_size=3)
|
||||
|
||||
def forward(self, x):
|
||||
x = x * 2
|
||||
x = self.conv(x)
|
||||
return x
|
||||
|
||||
|
||||
def test_strategies_constructor():
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
mesh_shape = (2, 2)
|
||||
# [[0, 1]
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape)
|
||||
entire_shape = torch.Size((4, 16, 64, 64))
|
||||
|
||||
tracer = ColoTracer()
|
||||
model = ConvModel(16, 32)
|
||||
input_sample = {'x': torch.rand(4, 16, 64, 64).to('meta')}
|
||||
# graph():
|
||||
# %x : torch.Tensor [#users=1] = placeholder[target=x]
|
||||
# %mul : [#users=1] = call_function[target=operator.mul](args = (%x, 2), kwargs = {})
|
||||
# %conv_weight : [#users=1] = get_attr[target=conv.weight]
|
||||
# %conv_bias : [#users=1] = get_attr[target=conv.bias]
|
||||
# %conv2d : [#users=1] = call_function[target=torch.conv2d](args = (%mul, %conv_weight), kwargs = {groups: 1, dilation: (1, 1), stride: (1, 1), padding: (0, 0)})
|
||||
# %view : [#users=1] = call_method[target=view](args = (%conv_bias, [1, -1, 1, 1]), kwargs = {})
|
||||
# %add : [#users=1] = call_function[target=operator.add](args = (%conv2d, %view), kwargs = {})
|
||||
# return add
|
||||
graph = tracer.trace(root=model, meta_args=input_sample)
|
||||
print(graph)
|
||||
gm = GraphModule(model, graph, model.__class__.__name__)
|
||||
gm.recompile()
|
||||
|
||||
solver_options = SolverOptions(fast=True)
|
||||
strategies_constructor = StrategiesConstructor(graph, device_mesh, solver_options)
|
||||
|
||||
assert strategies_constructor.leaf_strategies == []
|
||||
assert strategies_constructor.strategy_map == {}
|
||||
strategies_constructor.build_strategies_and_cost()
|
||||
|
||||
# check leaf_strategies
|
||||
|
||||
# In fast mode, placeholder node only has replica strategy.
|
||||
assert strategies_constructor.leaf_strategies[0][0].name == 'Replica Placeholder'
|
||||
|
||||
# Second node is mul which is a element-wise node, therefore the output sharding spec is same as input sharding spec.
|
||||
assert strategies_constructor.leaf_strategies[1][0].name == '[R, R, R, R] -> [R, R, R, R]_0'
|
||||
|
||||
# Third node is conv.
|
||||
conv_check_list = deepcopy(CONV_STRATEGIES_LIST)
|
||||
for strategy in strategies_constructor.leaf_strategies[4]:
|
||||
conv_check_list.remove(strategy.name)
|
||||
assert len(conv_check_list) == 0
|
||||
|
||||
# In fast mode, output node only has replica strategy.
|
||||
assert strategies_constructor.leaf_strategies[7][0].name == 'Replica Output'
|
||||
|
||||
# check strategy_map
|
||||
|
||||
nodes = [node for node in graph.nodes]
|
||||
# In fast mode, placeholder node only has replica strategy.
|
||||
x = nodes[0]
|
||||
assert strategies_constructor.strategy_map[x][0].name == 'Replica Placeholder'
|
||||
|
||||
# Second node is mul which is a element-wise node, therefore the output sharding spec is same as input sharding spec.
|
||||
mul = nodes[1]
|
||||
assert strategies_constructor.strategy_map[mul][0].name == '[R, R, R, R] -> [R, R, R, R]_0'
|
||||
|
||||
# fifth node is conv.
|
||||
conv = nodes[4]
|
||||
conv_check_list = deepcopy(CONV_STRATEGIES_LIST)
|
||||
for strategy in strategies_constructor.strategy_map[conv]:
|
||||
conv_check_list.remove(strategy.name)
|
||||
assert len(conv_check_list) == 0
|
||||
|
||||
# In fast mode, output node only has replica strategy.
|
||||
output = nodes[-1]
|
||||
assert strategies_constructor.strategy_map[output][0].name == 'Replica Output'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_strategies_constructor()
|
||||
Reference in New Issue
Block a user