mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-07 12:01:39 +00:00
[misc] refactor launch API and tensor constructor (#5666)
* [misc] remove config arg from initialize * [misc] remove old tensor contrusctor * [plugin] add npu support for ddp * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [devops] fix doc test ci * [test] fix test launch * [doc] update launch doc --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -20,7 +20,7 @@ def _benchmark(rank, world_size, port):
|
||||
only result in minor performance drop. So at last we might be able to find better training batch size for our
|
||||
model (combine with large batch training optimizer such as LAMB).
|
||||
"""
|
||||
colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
model = tm.resnet152()
|
||||
gm = symbolic_trace(model)
|
||||
raw_graph = deepcopy(gm.graph)
|
||||
|
@@ -17,7 +17,7 @@ def _benchmark(rank, world_size, port, args):
|
||||
The benchmark will sample in a range of memory budget for each model and output the benchmark summary and
|
||||
data visualization of peak memory vs. budget memory and relative step time vs. peak memory.
|
||||
"""
|
||||
colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
if args.model == "resnet50":
|
||||
model = tm.resnet50()
|
||||
data_gen = partial(data_gen_resnet, batch_size=128, shape=(3, 224, 224))
|
||||
|
@@ -128,7 +128,7 @@ def main():
|
||||
# ==============================
|
||||
# Launch Distributed Environment
|
||||
# ==============================
|
||||
colossalai.launch_from_torch(config={})
|
||||
colossalai.launch_from_torch()
|
||||
coordinator = DistCoordinator()
|
||||
|
||||
# update the learning rate with linear scaling
|
||||
|
@@ -148,7 +148,7 @@ def main():
|
||||
# ==============================
|
||||
# Launch Distributed Environment
|
||||
# ==============================
|
||||
colossalai.launch_from_torch(config={})
|
||||
colossalai.launch_from_torch()
|
||||
coordinator = DistCoordinator()
|
||||
|
||||
# update the learning rate with linear scaling
|
||||
|
@@ -125,7 +125,7 @@ def main():
|
||||
# ==============================
|
||||
# Launch Distributed Environment
|
||||
# ==============================
|
||||
colossalai.launch_from_torch(config={}, seed=42)
|
||||
colossalai.launch_from_torch(seed=42)
|
||||
coordinator = DistCoordinator()
|
||||
|
||||
# local_batch_size = BATCH_SIZE // coordinator.world_size
|
||||
|
@@ -289,7 +289,7 @@ class DummyDataloader:
|
||||
def main():
|
||||
args = parse_args()
|
||||
disable_existing_loggers()
|
||||
colossalai.legacy.launch_from_torch(config=dict())
|
||||
colossalai.legacy.launch_from_torch()
|
||||
logger = get_dist_logger()
|
||||
is_main_process = dist.get_rank() == 0
|
||||
|
||||
|
Reference in New Issue
Block a user