[misc] refactor launch API and tensor constructor (#5666)

* [misc] remove config arg from initialize

* [misc] remove old tensor contrusctor

* [plugin] add npu support for ddp

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [devops] fix doc test ci

* [test] fix test launch

* [doc] update launch doc

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Hongxin Liu
2024-04-29 10:40:11 +08:00
committed by GitHub
parent 91fa553775
commit 7f8b16635b
223 changed files with 294 additions and 403 deletions

View File

@@ -77,7 +77,7 @@ def run_naive_amp():
def run_dist(rank, world_size, port):
colossalai.legacy.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost")
colossalai.legacy.launch(rank=rank, world_size=world_size, port=port, host="localhost")
run_naive_amp()

View File

@@ -76,7 +76,7 @@ def run_torch_amp():
def run_dist(rank, world_size, port):
colossalai.legacy.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost")
colossalai.legacy.launch(rank=rank, world_size=world_size, port=port, host="localhost")
run_torch_amp()

View File

@@ -16,7 +16,7 @@ torch.manual_seed(123)
def check_layer(rank, world_size, port):
disable_existing_loggers()
launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl", verbose=False)
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl", verbose=False)
rank = gpc.get_local_rank(ParallelMode.PIPELINE)
if rank == 0:

View File

@@ -48,7 +48,7 @@ def check_all_reduce():
def check_layer(rank, world_size, port):
launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
assert dist.get_rank() == gpc.get_global_rank()
print("Rank {} / {}".format(dist.get_rank(), dist.get_world_size()))

View File

@@ -88,7 +88,7 @@ def check_send_recv_forward_backward():
def check_layer(rank, world_size, port):
launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
check_send_recv_forward()
check_send_recv_backward()
check_send_recv_forward_backward()

View File

@@ -104,7 +104,7 @@ def check_small_pipeline():
def check_layer(rank, world_size, port):
disable_existing_loggers()
launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
disable_existing_loggers()
# check_send_recv_forward()

View File

@@ -17,7 +17,7 @@ CONFIG = dict(
def check_layer(rank, world_size, port):
disable_existing_loggers()
launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
check_linear_col()
check_linear_row()

View File

@@ -50,7 +50,7 @@ def check_layer():
def check_layer_and_operation(rank, world_size, port):
disable_existing_loggers()
launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False

View File

@@ -38,7 +38,7 @@ def check_layer():
def check_layer_and_operation(rank, world_size, port):
disable_existing_loggers()
launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False

View File

@@ -44,7 +44,7 @@ def check_layer():
def check_layer_and_operation(rank, world_size, port):
disable_existing_loggers()
launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False
torch.backends.cudnn.deterministic = True

View File

@@ -378,7 +378,7 @@ def run_parallel_freq_aware_embed_columnwise(rank, world_size):
def run_dist(rank, world_size, port):
colossalai.legacy.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
colossalai.legacy.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
# run_parallel_freq_aware_embed_columnwise(rank, world_size)
run_parallel_freq_aware_embed_tablewise(rank, world_size)

View File

@@ -48,7 +48,7 @@ def check_mem():
def run_dist(rank, world_size, port):
colossalai.legacy.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
colossalai.legacy.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
check_mem()
run()

View File

@@ -9,7 +9,7 @@ from colossalai.testing import free_port
@pytest.mark.skip
def test_multiinheritance():
colossalai.legacy.launch(config={}, rank=0, world_size=1, host="localhost", port=free_port(), backend="nccl")
colossalai.legacy.launch(rank=0, world_size=1, host="localhost", port=free_port(), backend="nccl")
colo_param = ColoParameter(None, requires_grad=True)
assert colo_param.dist_spec.placement.value == "r"
assert isinstance(colo_param, ColoTensor)

View File

@@ -86,7 +86,7 @@ def check_comm(size, rank, prev_rank, next_rank, logger):
def run_check(rank, world_size, port):
launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
logger = get_dist_logger()
rank = gpc.get_global_rank()
prev_rank = gpc.get_prev_global_rank(ParallelMode.PIPELINE)

View File

@@ -23,7 +23,7 @@ CONFIG = dict(NUM_MICRO_BATCHES=2, parallel=dict(pipeline=dict(size=2), tensor=d
def run_schedule(rank, world_size, port):
launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
# build model
model = resnet18(num_classes=10)

View File

@@ -43,7 +43,7 @@ def check_checkpoint_1d(rank, world_size, port):
)
disable_existing_loggers()
launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
m1 = nn.Sequential(nn.Linear(4, 8), nn.Linear(8, 4))
sd1 = m1.state_dict()

View File

@@ -43,7 +43,7 @@ def check_checkpoint_2d(rank, world_size, port):
)
disable_existing_loggers()
launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
m1 = nn.Sequential(nn.Linear(4, 8), nn.Linear(8, 4))
sd1 = m1.state_dict()

View File

@@ -43,7 +43,7 @@ def check_checkpoint_2p5d(rank, world_size, port):
)
disable_existing_loggers()
launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
m1 = nn.Sequential(nn.Linear(4, 8), nn.Linear(8, 4))
sd1 = m1.state_dict()

View File

@@ -43,7 +43,7 @@ def check_checkpoint_3d(rank, world_size, port):
)
disable_existing_loggers()
launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
m1 = nn.Sequential(nn.Linear(4, 8), nn.Linear(8, 4))
sd1 = m1.state_dict()

View File

@@ -14,7 +14,7 @@ def _run_colo_set_process_memory_fraction_and_colo_device_memory_capacity():
def run_dist(rank, world_size, port):
colossalai.legacy.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
colossalai.legacy.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
_run_colo_set_process_memory_fraction_and_colo_device_memory_capacity()

View File

@@ -62,7 +62,7 @@ def run_grad_clip_norm(world_size: int, dtype: torch.dtype, device: str, norm_ty
def run_dist(rank, world_size, port):
disable_existing_loggers()
colossalai.legacy.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
colossalai.legacy.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
run_grad_clip_norm(world_size=world_size)

View File

@@ -7,7 +7,7 @@ from colossalai.testing import rerun_if_address_is_in_use, spawn
def run_tensor_move(rank, world_size, port):
colossalai.legacy.launch(config={}, rank=0, world_size=world_size, host="localhost", port=port, backend="nccl")
colossalai.legacy.launch(rank=0, world_size=world_size, host="localhost", port=port, backend="nccl")
src_t = torch.ones(2, 3).cuda()
tgt_t = torch.zeros(2, 3)