mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-11 05:49:55 +00:00
[misc] refactor launch API and tensor constructor (#5666)
* [misc] remove config arg from initialize * [misc] remove old tensor contrusctor * [plugin] add npu support for ddp * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [devops] fix doc test ci * [test] fix test launch * [doc] update launch doc --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -124,7 +124,7 @@ def check_all_reduce_bwd(process_groups_dict, rank):
|
||||
|
||||
def check_comm(rank, world_size, port):
|
||||
disable_existing_loggers()
|
||||
launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
|
||||
physical_mesh_id = torch.arange(0, 4)
|
||||
assert rank == dist.get_rank()
|
||||
|
@@ -21,7 +21,7 @@ class TestModel(torch.nn.Module):
|
||||
|
||||
def check_dtensor(rank, world_size, port):
|
||||
disable_existing_loggers()
|
||||
launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
test_model = TestModel(8, 8).to("cuda")
|
||||
original_tensor = torch.rand(4, 8).to("cuda")
|
||||
compare_output = test_model(original_tensor)
|
||||
|
@@ -20,7 +20,7 @@ mesh_shape = (2, 2)
|
||||
|
||||
def check_one_step_transform(rank, world_size, port):
|
||||
disable_existing_loggers()
|
||||
launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
# [[0, 1],
|
||||
# [2, 3]]
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape, init_process_group=True)
|
||||
@@ -82,7 +82,7 @@ def check_one_step_transform(rank, world_size, port):
|
||||
|
||||
def check_layout_converting(rank, world_size, port):
|
||||
disable_existing_loggers()
|
||||
launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
dim_partition_source = {1: [0, 1]}
|
||||
dim_partition_target = {0: [0, 1]}
|
||||
device_mesh = DeviceMesh(physical_mesh_id, mesh_shape, init_process_group=True)
|
||||
@@ -141,7 +141,7 @@ def check_layout_converting(rank, world_size, port):
|
||||
|
||||
def check_layout_converting_apply(rank, world_size, port):
|
||||
disable_existing_loggers()
|
||||
launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
|
||||
dim_partition_source = {1: [0, 1]}
|
||||
dim_partition_target = {0: [0, 1]}
|
||||
|
Reference in New Issue
Block a user