mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-07 03:52:01 +00:00
[misc] refactor launch API and tensor constructor (#5666)
* [misc] remove config arg from initialize * [misc] remove old tensor contrusctor * [plugin] add npu support for ddp * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [devops] fix doc test ci * [test] fix test launch * [doc] update launch doc --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -94,8 +94,7 @@ def train_gpt(args):
|
||||
|
||||
|
||||
def run(rank, world_size, port, args):
|
||||
config = {}
|
||||
colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
train_gpt(args)
|
||||
|
||||
|
||||
|
@@ -47,7 +47,7 @@ def get_data(batch_size, seq_len, vocab_size):
|
||||
|
||||
def main():
|
||||
disable_existing_loggers()
|
||||
launch_from_torch(config={})
|
||||
launch_from_torch()
|
||||
logger = get_dist_logger()
|
||||
config = transformers.GPT2Config(n_position=SEQ_LENGTH, n_layer=NUM_LAYERS, n_head=NUM_HEADS, n_embd=HIDDEN_DIM)
|
||||
if FP16:
|
||||
|
@@ -132,7 +132,7 @@ def main():
|
||||
PROF_FLAG = False # The flag of profiling, False by default
|
||||
|
||||
disable_existing_loggers()
|
||||
colossalai.launch_from_torch(config={})
|
||||
colossalai.launch_from_torch()
|
||||
|
||||
logger = get_dist_logger()
|
||||
logger.info(f"{args.model_type}, {args.distplan}, batch size {BATCH_SIZE}", ranks=[0])
|
||||
|
@@ -67,7 +67,7 @@ def main():
|
||||
parser.add_argument("--cpu_offload", action="store_true", help="Use gradient checkpointing")
|
||||
args = parser.parse_args()
|
||||
|
||||
colossalai.launch_from_torch({})
|
||||
colossalai.launch_from_torch()
|
||||
coordinator = DistCoordinator()
|
||||
|
||||
def empty_init():
|
||||
|
@@ -196,7 +196,7 @@ def main():
|
||||
# ==============================
|
||||
# Launch Distributed Environment
|
||||
# ==============================
|
||||
colossalai.launch_from_torch(config={}, seed=42)
|
||||
colossalai.launch_from_torch(seed=42)
|
||||
coordinator = DistCoordinator()
|
||||
|
||||
# local_batch_size = BATCH_SIZE // coordinator.world_size
|
||||
|
@@ -36,9 +36,9 @@ def main():
|
||||
args = parser.parse_args()
|
||||
disable_existing_loggers()
|
||||
if args.from_torch:
|
||||
colossalai.launch_from_torch(config=args.config)
|
||||
colossalai.launch_from_torch()
|
||||
else:
|
||||
colossalai.launch_from_slurm(config=args.config, host=args.host, port=29500, seed=42)
|
||||
colossalai.launch_from_slurm(host=args.host, port=29500, seed=42)
|
||||
logger = get_dist_logger()
|
||||
|
||||
data_path = None if args.use_dummy_dataset else os.environ["DATA"]
|
||||
|
Reference in New Issue
Block a user