mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-06 03:20:52 +00:00
[tensor]add 1D device mesh (#1492)
This commit is contained in:
@@ -133,13 +133,36 @@ def check_all_reduce(device_mesh, rank):
|
||||
# device_mesh_shape: (2, 2)
|
||||
sharding_spec = ShardingSpec(device_mesh, tensor_to_comm.shape, dim_partition_dict=dim_partition_dict)
|
||||
|
||||
# CommSpec:CommSpec:(comm_pattern:all_reduce, logical_process_axis:0)
|
||||
# CommSpec:(comm_pattern:all_reduce, logical_process_axis:0)
|
||||
comm_spec = CommSpec(CollectiveCommPattern.ALLREDUCE, sharding_spec, logical_process_axis=0)
|
||||
comm_spec.covert_spec_to_action(tensor_to_comm)
|
||||
|
||||
assert tensor_to_comm.equal(tensor_to_check)
|
||||
|
||||
|
||||
def check_all_reduce_in_flatten_device_mesh(device_mesh, rank):
|
||||
# tensor to comm
|
||||
tensor_to_comm = torch.ones(2, 2).cuda() * rank
|
||||
|
||||
# reduce through logical process axis 0 at flatten device mesh
|
||||
# tensor to check
|
||||
# tensor([[6., 6.],
|
||||
# [6., 6.]])
|
||||
tensor_to_check = torch.tensor([[6, 6], [6, 6]], dtype=tensor_to_comm.dtype).cuda()
|
||||
|
||||
dim_partition_dict = {}
|
||||
# DistSpec:
|
||||
# shard_sequence: R,R
|
||||
# device_mesh_shape: (2, 2)
|
||||
sharding_spec = ShardingSpec(device_mesh, tensor_to_comm.shape, dim_partition_dict=dim_partition_dict)
|
||||
|
||||
# CommSpec:(comm_pattern:all_reduce, logical_process_axis:[0, 1])
|
||||
comm_spec = CommSpec(CollectiveCommPattern.ALLREDUCE, sharding_spec, logical_process_axis=[0, 1])
|
||||
comm_spec.covert_spec_to_action(tensor_to_comm)
|
||||
|
||||
assert tensor_to_comm.equal(tensor_to_check)
|
||||
|
||||
|
||||
def check_comm(rank, world_size, port):
|
||||
disable_existing_loggers()
|
||||
launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
||||
@@ -162,6 +185,9 @@ def check_comm(rank, world_size, port):
|
||||
|
||||
# test all reduce
|
||||
check_all_reduce(device_mesh, rank)
|
||||
|
||||
# test all reduce in 1D flatten device mesh
|
||||
check_all_reduce_in_flatten_device_mesh(device_mesh, rank)
|
||||
gpc.destroy()
|
||||
|
||||
|
||||
|
@@ -64,7 +64,6 @@ def check_apply(rank, world_size, port):
|
||||
|
||||
tensor_to_comm.sharding_spec = sharding_spec_source
|
||||
shape_consistency_manager.apply(tensor_to_comm, sharding_spec_target)
|
||||
print(tensor_to_comm)
|
||||
assert tensor_to_comm.equal(tensor_to_check)
|
||||
assert str(tensor_to_comm.sharding_spec.sharding_sequence) == str(sharding_spec_target.sharding_sequence)
|
||||
|
||||
|
Reference in New Issue
Block a user