mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-01 17:17:05 +00:00
[example] add llama2 example (#4527)
* [example] transfer llama-1 example * [example] fit llama-2 * [example] refactor scripts folder * [example] fit new gemini plugin * [cli] fix multinode runner * [example] fit gemini optim checkpoint * [example] refactor scripts * [example] update requirements * [example] update requirements * [example] rename llama to llama2 * [example] update readme and pretrain script * [example] refactor scripts
This commit is contained in:
@@ -265,6 +265,10 @@ def launch_multi_processes(args: Config) -> None:
|
||||
# establish remote connection
|
||||
runner.connect(host_info_list=active_device_pool, workdir=curr_path, env=env)
|
||||
|
||||
# overwrite master addr when num_nodes > 1 and not specified
|
||||
if len(active_device_pool) > 1 and args.master_addr == "127.0.0.1":
|
||||
args.master_addr = active_device_pool.hostinfo_list[0].hostname
|
||||
|
||||
# execute distributed launching command
|
||||
for node_id, hostinfo in enumerate(active_device_pool):
|
||||
cmd = get_launch_command(master_addr=args.master_addr,
|
||||
|
Reference in New Issue
Block a user