mirror of
				https://github.com/hpcaitech/ColossalAI.git
				synced 2025-11-04 07:58:42 +00:00 
			
		
		
		
	[doc] improved error messages in initialize (#872)
This commit is contained in:
		@@ -138,8 +138,14 @@ def launch_from_slurm(config: Union[str, Path, Config, Dict],
 | 
				
			|||||||
        seed (int, optional): Specified random seed for every process. Defaults to 1024.
 | 
					        seed (int, optional): Specified random seed for every process. Defaults to 1024.
 | 
				
			||||||
        verbose (bool, optional): Whether to print logs. Defaults to True.
 | 
					        verbose (bool, optional): Whether to print logs. Defaults to True.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    rank = int(os.environ['SLURM_PROCID'])
 | 
					    try:
 | 
				
			||||||
    world_size = int(os.environ['SLURM_NPROCS'])
 | 
					        rank = int(os.environ['SLURM_PROCID'])
 | 
				
			||||||
 | 
					        world_size = int(os.environ['SLURM_NPROCS'])
 | 
				
			||||||
 | 
					    except KeyError as e:
 | 
				
			||||||
 | 
					        raise RuntimeError(
 | 
				
			||||||
 | 
					            f"Could not find {e} in the SLURM environment, visit https://www.colossalai.org/ for more information on launching with SLURM"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    launch(config=config,
 | 
					    launch(config=config,
 | 
				
			||||||
           rank=rank,
 | 
					           rank=rank,
 | 
				
			||||||
           world_size=world_size,
 | 
					           world_size=world_size,
 | 
				
			||||||
@@ -167,9 +173,15 @@ def launch_from_openmpi(config: Union[str, Path, Config, Dict],
 | 
				
			|||||||
        seed (int, optional): Specified random seed for every process. Defaults to 1024.
 | 
					        seed (int, optional): Specified random seed for every process. Defaults to 1024.
 | 
				
			||||||
        verbose (bool, optional): Whether to print logs. Defaults to True.
 | 
					        verbose (bool, optional): Whether to print logs. Defaults to True.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    rank = int(os.environ['OMPI_COMM_WORLD_RANK'])
 | 
					    try:
 | 
				
			||||||
    local_rank = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK'])
 | 
					        rank = int(os.environ['OMPI_COMM_WORLD_RANK'])
 | 
				
			||||||
    world_size = int(os.environ['OMPI_COMM_WORLD_SIZE'])
 | 
					        local_rank = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK'])
 | 
				
			||||||
 | 
					        world_size = int(os.environ['OMPI_COMM_WORLD_SIZE'])
 | 
				
			||||||
 | 
					    except KeyError as e:
 | 
				
			||||||
 | 
					        raise RuntimeError(
 | 
				
			||||||
 | 
					            f"Could not find {e} in the OpenMPI environment, visit https://www.colossalai.org/ for more information on launching with OpenMPI"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    launch(config=config,
 | 
					    launch(config=config,
 | 
				
			||||||
           local_rank=local_rank,
 | 
					           local_rank=local_rank,
 | 
				
			||||||
           rank=rank,
 | 
					           rank=rank,
 | 
				
			||||||
@@ -194,11 +206,17 @@ def launch_from_torch(config: Union[str, Path, Config, Dict],
 | 
				
			|||||||
        seed (int, optional): Specified random seed for every process. Defaults to 1024.
 | 
					        seed (int, optional): Specified random seed for every process. Defaults to 1024.
 | 
				
			||||||
        verbose (bool, optional): Whether to print logs. Defaults to True.
 | 
					        verbose (bool, optional): Whether to print logs. Defaults to True.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    rank = int(os.environ['RANK'])
 | 
					    try:
 | 
				
			||||||
    local_rank = int(os.environ['LOCAL_RANK'])
 | 
					        rank = int(os.environ['RANK'])
 | 
				
			||||||
    world_size = int(os.environ['WORLD_SIZE'])
 | 
					        local_rank = int(os.environ['LOCAL_RANK'])
 | 
				
			||||||
    host = os.environ['MASTER_ADDR']
 | 
					        world_size = int(os.environ['WORLD_SIZE'])
 | 
				
			||||||
    port = int(os.environ['MASTER_PORT'])
 | 
					        host = os.environ['MASTER_ADDR']
 | 
				
			||||||
 | 
					        port = int(os.environ['MASTER_PORT'])
 | 
				
			||||||
 | 
					    except KeyError as e:
 | 
				
			||||||
 | 
					        raise RuntimeError(
 | 
				
			||||||
 | 
					            f"Could not find {e} in the torch environment, visit https://www.colossalai.org/ for more information on launching with torch"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    launch(config=config,
 | 
					    launch(config=config,
 | 
				
			||||||
           local_rank=local_rank,
 | 
					           local_rank=local_rank,
 | 
				
			||||||
           rank=rank,
 | 
					           rank=rank,
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user