mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-07 20:10:17 +00:00
Fixed docstring in colossalai (#171)
This commit is contained in:
@@ -29,12 +29,12 @@ from colossalai.global_variables import moe_env
|
||||
|
||||
|
||||
def get_default_parser():
|
||||
'''Reads user command line and uses an argument parser to parse the input arguments.
|
||||
"""Reads user command line and uses an argument parser to parse the input arguments.
|
||||
Input arguments include configuration, host, port, world size, local rank, backend for torch.distributed.
|
||||
|
||||
:return: returns the parser with the default arguments, the user may add customized arguments into this parser
|
||||
:return: Returns the parser with the default arguments, the user may add customized arguments into this parser
|
||||
:rtype: Namespace
|
||||
'''
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--config', type=str, help='path to the config file')
|
||||
parser.add_argument('--host',
|
||||
@@ -64,28 +64,30 @@ def launch(config: Union[str, Path, Config, Dict],
|
||||
local_rank: int = None,
|
||||
seed: int = 1024,
|
||||
verbose: bool = True):
|
||||
'''This function first parses the configuration arguments, using :func:parse_args() in case one of the input arguments are not given.
|
||||
Then initialize and set distributed environment by calling global_context's functions.
|
||||
"""This function first parses the configuration arguments, using :func:`parse_args()` in case one of the input
|
||||
arguments are not given. Then initialize and set distributed environment by calling global_context's functions.
|
||||
|
||||
:param config: config file or config file path are both acceptable
|
||||
:param config: Config file or config file path are both acceptable
|
||||
:type config: Union[str, dict, Config]
|
||||
:param rank: rank for the default process group
|
||||
:param rank: Rank for the default process group
|
||||
:type rank: int
|
||||
:param world_size: world size of the default process group
|
||||
:param world_size: World size of the default process group
|
||||
:type world_size: int
|
||||
:param host: the master address for distributed training
|
||||
:param host: The master address for distributed training
|
||||
:type host: str
|
||||
:param port: the master port for distributed training
|
||||
:param port: The master port for distributed training
|
||||
:type port: str
|
||||
:param backend: backend for torch.distributed
|
||||
:type backend: str
|
||||
:param local_rank: rank for the process on the node and is used to set the default CUDA device,
|
||||
defaults to None. If local_rank = None, the default device ordinal will be calculated automatically
|
||||
:param backend: Backend for torch.distributed
|
||||
:type backend: str, optional
|
||||
:param local_rank: Rank for the process on the node and is used to set the default CUDA device, defaults to None.
|
||||
If local_rank = None, the default device ordinal will be calculated automatically
|
||||
:type local_rank: int, optional
|
||||
:param verbose: whether to print logs
|
||||
:type verbose: bool
|
||||
:raises Exception: raise exception when config type is wrong
|
||||
'''
|
||||
:param seed: Specified random seed for every processes
|
||||
:type seed: int, optional
|
||||
:param verbose: Whether to print logs
|
||||
:type verbose: bool, optional
|
||||
:raises Exception: Raise exception when config type is wrong
|
||||
"""
|
||||
gpc.verbose = verbose
|
||||
|
||||
# set config
|
||||
@@ -123,20 +125,22 @@ def launch_from_slurm(config: Union[str, Path, Config, Dict],
|
||||
backend: str = 'nccl',
|
||||
seed: int = 1024,
|
||||
verbose: bool = True):
|
||||
'''A wrapper for colossalai.launch for SLURM launcher by reading rank and world size from the environment variables
|
||||
"""A wrapper for colossalai.launch for SLURM launcher by reading rank and world size from the environment variables
|
||||
set by SLURM
|
||||
|
||||
:param config: config file or config file path are both acceptable
|
||||
:param config: Config file or config file path are both acceptable
|
||||
:type config: Union[str, dict, Config]
|
||||
:param host: the master address for distributed training
|
||||
:param host: The master address for distributed training
|
||||
:type host: str
|
||||
:param port: the master port for distributed training
|
||||
:param port: The master port for distributed training
|
||||
:type port: str
|
||||
:param backend: backend for torch.distributed
|
||||
:type backend: str
|
||||
:param verbose: whether to print logs
|
||||
:type verbose: bool
|
||||
'''
|
||||
:param backend: Backend for torch.distributed
|
||||
:type backend: str, optional
|
||||
:param seed: Specified random seed for every processes
|
||||
:type seed: int, optional
|
||||
:param verbose: Whether to print logs
|
||||
:type verbose: bool, optional
|
||||
"""
|
||||
rank = int(os.environ['SLURM_PROCID'])
|
||||
world_size = int(os.environ['SLURM_NPROCS'])
|
||||
launch(config=config,
|
||||
@@ -155,20 +159,22 @@ def launch_from_openmpi(config: Union[str, Path, Config, Dict],
|
||||
backend: str = 'nccl',
|
||||
seed: int = 1024,
|
||||
verbose: bool = True):
|
||||
'''A wrapper for colossalai.launch for OpenMPI launcher by reading rank and world size from the environment variables
|
||||
"""A wrapper for colossalai.launch for OpenMPI launcher by reading rank and world size from the environment variables
|
||||
set by OpenMPI
|
||||
|
||||
:param config: config file or config file path are both acceptable
|
||||
:param config: Config file or config file path are both acceptable
|
||||
:type config: Union[str, dict, Config]
|
||||
:param host: the master address for distributed training
|
||||
:param host: The master address for distributed training
|
||||
:type host: str
|
||||
:param port: the master port for distributed training
|
||||
:param port: The master port for distributed training
|
||||
:type port: str
|
||||
:param backend: backend for torch.distributed
|
||||
:type backend: str
|
||||
:param verbose: whether to print logs
|
||||
:type verbose: bool
|
||||
'''
|
||||
:param backend: Backend for torch.distributed
|
||||
:type backend: str, optional
|
||||
:param seed: Specified random seed for every processes
|
||||
:type seed: int, optional
|
||||
:param verbose: Whether to print logs
|
||||
:type verbose: bool, optional
|
||||
"""
|
||||
rank = int(os.environ['OMPI_COMM_WORLD_RANK'])
|
||||
local_rank = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK'])
|
||||
world_size = int(os.environ['OMPI_COMM_WORLD_SIZE'])
|
||||
@@ -187,20 +193,18 @@ def launch_from_torch(config: Union[str, Path, Config, Dict],
|
||||
backend: str = 'nccl',
|
||||
seed: int = 1024,
|
||||
verbose: bool = True):
|
||||
'''A wrapper for colossalai.launch for torchrun or torch.distributed.launch by reading rank and world size
|
||||
"""A wrapper for colossalai.launch for torchrun or torch.distributed.launch by reading rank and world size
|
||||
from the environment variables set by PyTorch
|
||||
|
||||
:param config: config file or config file path are both acceptable
|
||||
:param config: Config file or config file path are both acceptable
|
||||
:type config: Union[str, dict, Config]
|
||||
:param host: the master address for distributed training
|
||||
:type host: str
|
||||
:param port: the master port for distributed training
|
||||
:type port: str
|
||||
:param backend: backend for torch.distributed
|
||||
:type backend: str
|
||||
:param verbose: whether to print logs
|
||||
:type verbose: bool
|
||||
'''
|
||||
:param backend: Backend for torch.distributed
|
||||
:type backend: str, optional
|
||||
:param seed: Specified random seed for every processes
|
||||
:type seed: int, optional
|
||||
:param verbose: Whether to print logs
|
||||
:type verbose: bool, optional
|
||||
"""
|
||||
rank = int(os.environ['RANK'])
|
||||
local_rank = int(os.environ['LOCAL_RANK'])
|
||||
world_size = int(os.environ['WORLD_SIZE'])
|
||||
@@ -225,25 +229,26 @@ def initialize(model: Union[nn.Module, List[nn.Module]],
|
||||
lr_scheduler: _LRScheduler = None,
|
||||
verbose: bool = True
|
||||
) -> Tuple[Engine, DataLoader, DataLoader, _LRScheduler]:
|
||||
''' Core function to wrap the essential training components with our functionality based on the config which is loaded into gpc.config.
|
||||
"""Core function to wrap the essential training components with our functionality based on the config which is
|
||||
loaded into gpc.config.
|
||||
|
||||
:param model: your model instance
|
||||
:param model: Your model instance
|
||||
:type model: :class:`torch.nn.Module`
|
||||
:param optimizer: your optimizer instance
|
||||
:param optimizer: Your optimizer instance
|
||||
:type optimizer: :class:`torch.optim.optimizer.Optimizer`
|
||||
:param criterion: your criterion instance
|
||||
:param criterion: Your criterion instance
|
||||
:type criterion: :class:`torch.nn.modules.loss._Loss`
|
||||
:param train_dataloader: dataloader for training data
|
||||
:type train_dataloader: :class:`torch.utils.data.DataLoader`
|
||||
:param train_dataloader: dataloader for testing data
|
||||
:type train_dataloader: :class:`torch.utils.data.DataLoader`
|
||||
:param lr_scheduler: your lr scheduler instance
|
||||
:type lr_scheduler: :class:`torch.nn.lr_scheduler._LRScheduler`
|
||||
:param verbose: whether to print logs
|
||||
:type verbose: bool
|
||||
:param train_dataloader: Dataloader for training
|
||||
:type train_dataloader: :class:`torch.utils.data.DataLoader`, optional
|
||||
:param test_dataloader: Dataloader for testing
|
||||
:type test_dataloader: :class:`torch.utils.data.DataLoader`, optional
|
||||
:param lr_scheduler: Your lr scheduler instance
|
||||
:type lr_scheduler: :class:`torch.nn.lr_scheduler._LRScheduler`, optional
|
||||
:param verbose: Whether to print logs
|
||||
:type verbose: bool, optional
|
||||
:return: (engine, train_dataloader, test_dataloader, lr_scheduler)
|
||||
:rtype: tuple
|
||||
'''
|
||||
:rtype: Tuple
|
||||
"""
|
||||
# get logger
|
||||
logger = get_dist_logger()
|
||||
gpc.verbose = verbose
|
||||
|
Reference in New Issue
Block a user