update examples and sphnix docs for the new api (#63)

This commit is contained in:
Frank Lee
2021-12-13 22:07:01 +08:00
committed by GitHub
parent 7d3711058f
commit 35813ed3c4
124 changed files with 1251 additions and 1462 deletions

View File

@@ -77,10 +77,10 @@ fp16 = dict(
)
```
## Tensor Parallel AMP
## Naive AMP
We leveraged the Megatron-LM implementation to achieve mixed precision training while maintaining compatibility with complex tensor
and pipeline parallelism.
and pipeline parallelism. This AMP mode will cast all operations into fp16.
The following conde block show a config file for this mode.

View File

@@ -0,0 +1,5 @@
colossalai.amp.apex\_amp
==========================
.. automodule:: colossalai.amp.apex_amp
:members:

View File

@@ -0,0 +1,5 @@
colossalai.amp.naive\_amp
==========================
.. automodule:: colossalai.amp.naive_amp
:members:

View File

@@ -0,0 +1,13 @@
colossalai.amp
==================
.. toctree::
:maxdepth: 2
colossalai.amp.torch_amp
colossalai.amp.apex_amp
colossalai.amp.naive_amp
.. automodule:: colossalai.amp
:members:

View File

@@ -0,0 +1,5 @@
colossalai.amp.torch\_amp
==========================
.. automodule:: colossalai.amp.torch_amp
:members:

View File

@@ -1,12 +1,12 @@
colossalai.builder
==================
.. automodule:: colossalai.builder
:members:
.. toctree::
:maxdepth: 2
colossalai.builder.builder
colossalai.builder.pipeline
.. automodule:: colossalai.builder
:members:

View File

@@ -1,5 +0,0 @@
colossalai.checkpointing
========================
.. automodule:: colossalai.checkpointing
:members:

View File

@@ -1,10 +1,6 @@
colossalai.communication
========================
.. automodule:: colossalai.communication
:members:
.. toctree::
:maxdepth: 2
@@ -12,3 +8,7 @@ colossalai.communication
colossalai.communication.p2p
colossalai.communication.ring
colossalai.communication.utils
.. automodule:: colossalai.communication
:members:

View File

@@ -1,11 +1,11 @@
colossalai.context.random
=========================
.. automodule:: colossalai.context.random
:members:
.. toctree::
:maxdepth: 2
colossalai.context.random.seed_manager
.. automodule:: colossalai.context.random
:members:

View File

@@ -1,9 +1,6 @@
colossalai.context
==================
.. automodule:: colossalai.context
:members:
.. toctree::
:maxdepth: 2
@@ -17,3 +14,7 @@ colossalai.context
colossalai.context.config
colossalai.context.parallel_context
colossalai.context.parallel_mode
.. automodule:: colossalai.context
:members:

View File

@@ -1,5 +0,0 @@
colossalai.engine.amp.amp\_type
===============================
.. automodule:: colossalai.engine.amp.amp_type
:members:

View File

@@ -1,5 +0,0 @@
colossalai.engine.amp.grad\_scaler
==================================
.. automodule:: colossalai.engine.amp.grad_scaler
:members:

View File

@@ -1,12 +0,0 @@
colossalai.engine.amp
=====================
.. automodule:: colossalai.engine.amp
:members:
.. toctree::
:maxdepth: 2
colossalai.engine.amp.amp_type
colossalai.engine.amp.grad_scaler

View File

@@ -1,12 +1,12 @@
colossalai.engine
=================
.. automodule:: colossalai.engine
:members:
.. toctree::
:maxdepth: 2
colossalai.engine.amp
colossalai.engine.gradient_handler
colossalai.engine.schedule
.. automodule:: colossalai.engine
:members:

View File

@@ -1,11 +1,11 @@
colossalai.logging
==================
.. automodule:: colossalai.logging
:members:
.. toctree::
:maxdepth: 2
colossalai.logging.logging
.. automodule:: colossalai.logging
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.data.base\_dataset
================================
.. automodule:: colossalai.nn.data.base_dataset
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.data.caltech101\_dataset
======================================
.. automodule:: colossalai.nn.data.caltech101_dataset
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.data.cifar10\_dataset
===================================
.. automodule:: colossalai.nn.data.cifar10_dataset
:members:

View File

@@ -1,18 +0,0 @@
colossalai.nn.data
==================
.. automodule:: colossalai.nn.data
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.data.sampler
.. toctree::
:maxdepth: 2
colossalai.nn.data.base_dataset
colossalai.nn.data.caltech101_dataset
colossalai.nn.data.cifar10_dataset

View File

@@ -1,5 +0,0 @@
colossalai.nn.data.sampler.base\_sampler
========================================
.. automodule:: colossalai.nn.data.sampler.base_sampler
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.data.sampler.data\_parallel\_sampler
==================================================
.. automodule:: colossalai.nn.data.sampler.data_parallel_sampler
:members:

View File

@@ -1,12 +0,0 @@
colossalai.nn.data.sampler
==========================
.. automodule:: colossalai.nn.data.sampler
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.data.sampler.base_sampler
colossalai.nn.data.sampler.data_parallel_sampler

View File

@@ -0,0 +1,5 @@
colossalai.nn.layer.non\_parallel\_layers
======================================
.. automodule:: colossalai.nn.layer.non_parallel_layers
:members:

View File

@@ -1,11 +1,11 @@
colossalai.nn.layer.parallel\_1d
================================
.. automodule:: colossalai.nn.layer.parallel_1d
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.layer.parallel_1d.layers
.. automodule:: colossalai.nn.layer.parallel_1d
:members:

View File

@@ -1,11 +1,11 @@
colossalai.nn.layer.parallel\_2d
================================
.. automodule:: colossalai.nn.layer.parallel_2d
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.layer.parallel_2d.layers
.. automodule:: colossalai.nn.layer.parallel_2d
:members:

View File

@@ -1,11 +1,11 @@
colossalai.nn.layer.parallel\_2p5d
==================================
.. automodule:: colossalai.nn.layer.parallel_2p5d
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.layer.parallel_2p5d.layers
.. automodule:: colossalai.nn.layer.parallel_2p5d
:members:

View File

@@ -1,11 +1,11 @@
colossalai.nn.layer.parallel\_3d
================================
.. automodule:: colossalai.nn.layer.parallel_3d
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.layer.parallel_3d.layers
.. automodule:: colossalai.nn.layer.parallel_3d
:members:

View File

@@ -1,11 +1,11 @@
colossalai.nn.layer.parallel\_sequence
======================================
.. automodule:: colossalai.nn.layer.parallel_sequence
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.layer.parallel_sequence.layers
.. automodule:: colossalai.nn.layer.parallel_sequence
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.layer.parallel\_vision\_transformer.layers
========================================================
.. automodule:: colossalai.nn.layer.parallel_vision_transformer.layers
:members:

View File

@@ -1,11 +0,0 @@
colossalai.nn.layer.parallel\_vision\_transformer
=================================================
.. automodule:: colossalai.nn.layer.parallel_vision_transformer
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.layer.parallel_vision_transformer.layers

View File

@@ -1,9 +1,6 @@
colossalai.nn.layer
===================
.. automodule:: colossalai.nn.layer
:members:
.. toctree::
:maxdepth: 2
@@ -12,13 +9,10 @@ colossalai.nn.layer
colossalai.nn.layer.parallel_2p5d
colossalai.nn.layer.parallel_3d
colossalai.nn.layer.parallel_sequence
colossalai.nn.layer.parallel_vision_transformer
colossalai.nn.layer.vanilla_resnet
colossalai.nn.layer.vanilla_vision_transformer
colossalai.nn.layer.non_parallel_layers
colossalai.nn.layer.wrapper
.. toctree::
:maxdepth: 2
colossalai.nn.layer.base_layer
.. automodule:: colossalai.nn.layer
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.layer.vanilla\_resnet.basic\_block
================================================
.. automodule:: colossalai.nn.layer.vanilla_resnet.basic_block
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.layer.vanilla\_resnet.bottleneck
==============================================
.. automodule:: colossalai.nn.layer.vanilla_resnet.bottleneck
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.layer.vanilla\_resnet.conv
========================================
.. automodule:: colossalai.nn.layer.vanilla_resnet.conv
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.layer.vanilla\_resnet.reslayer
============================================
.. automodule:: colossalai.nn.layer.vanilla_resnet.reslayer
:members:

View File

@@ -1,14 +0,0 @@
colossalai.nn.layer.vanilla\_resnet
===================================
.. automodule:: colossalai.nn.layer.vanilla_resnet
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.layer.vanilla_resnet.basic_block
colossalai.nn.layer.vanilla_resnet.bottleneck
colossalai.nn.layer.vanilla_resnet.conv
colossalai.nn.layer.vanilla_resnet.reslayer

View File

@@ -1,5 +0,0 @@
colossalai.nn.layer.vanilla\_vision\_transformer.layers
=======================================================
.. automodule:: colossalai.nn.layer.vanilla_vision_transformer.layers
:members:

View File

@@ -1,11 +0,0 @@
colossalai.nn.layer.vanilla\_vision\_transformer
================================================
.. automodule:: colossalai.nn.layer.vanilla_vision_transformer
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.layer.vanilla_vision_transformer.layers

View File

@@ -1,5 +0,0 @@
colossalai.nn.loss.base\_loss
=============================
.. automodule:: colossalai.nn.loss.base_loss
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.loss.cross\_entropy\_1d
=====================================
.. automodule:: colossalai.nn.loss.cross_entropy_1d
:members:

View File

@@ -1,15 +1,13 @@
colossalai.nn.loss
==================
.. automodule:: colossalai.nn.loss
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.loss.base_loss
colossalai.nn.loss.cross_entropy_1d
colossalai.nn.loss.cross_entropy_2d
colossalai.nn.loss.cross_entropy_2p5d
colossalai.nn.loss.cross_entropy_3d
.. automodule:: colossalai.nn.loss
:members:

View File

@@ -1,10 +1,6 @@
colossalai.nn.lr\_scheduler
===========================
.. automodule:: colossalai.nn.lr_scheduler
:members:
.. toctree::
:maxdepth: 2
@@ -15,3 +11,7 @@ colossalai.nn.lr\_scheduler
colossalai.nn.lr_scheduler.onecycle
colossalai.nn.lr_scheduler.poly
colossalai.nn.lr_scheduler.torch
.. automodule:: colossalai.nn.lr_scheduler
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.model.base\_model
===============================
.. automodule:: colossalai.nn.model.base_model
:members:

View File

@@ -0,0 +1,5 @@
colossalai.nn.model.model\_from\_config
===============================
.. automodule:: colossalai.nn.model.model_from_config
:members:

View File

@@ -1,17 +1,7 @@
colossalai.nn.model
===================
.. automodule:: colossalai.nn.model
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.model.vanilla_resnet
colossalai.nn.model.vision_transformer
.. toctree::
:maxdepth: 2
colossalai.nn.model.base_model
colossalai.nn.model.model_from_config

View File

@@ -1,5 +0,0 @@
colossalai.nn.model.vanilla\_resnet.resnet
==========================================
.. automodule:: colossalai.nn.model.vanilla_resnet.resnet
:members:

View File

@@ -1,11 +0,0 @@
colossalai.nn.model.vanilla\_resnet
===================================
.. automodule:: colossalai.nn.model.vanilla_resnet
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.model.vanilla_resnet.resnet

View File

@@ -1,11 +0,0 @@
colossalai.nn.model.vision\_transformer
=======================================
.. automodule:: colossalai.nn.model.vision_transformer
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.model.vision_transformer.vision_transformer

View File

@@ -1,5 +0,0 @@
colossalai.nn.model.vision\_transformer.vision\_transformer
===========================================================
.. automodule:: colossalai.nn.model.vision_transformer.vision_transformer
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.multi\_tensor\_apply.multi\_tensor\_apply
=======================================================
.. automodule:: colossalai.nn.multi_tensor_apply.multi_tensor_apply
:members:

View File

@@ -1,11 +0,0 @@
colossalai.nn.multi\_tensor\_apply
==================================
.. automodule:: colossalai.nn.multi_tensor_apply
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.multi_tensor_apply.multi_tensor_apply

View File

@@ -1,5 +0,0 @@
colossalai.nn.optimizer.fp16\_optimizer
=======================================
.. automodule:: colossalai.nn.optimizer.fp16_optimizer
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.optimizer.loss\_scaler
====================================
.. automodule:: colossalai.nn.optimizer.loss_scaler
:members:

View File

@@ -1,20 +1,15 @@
colossalai.nn.optimizer
=======================
.. automodule:: colossalai.nn.optimizer
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.optimizer.fp16_optimizer
colossalai.nn.optimizer.fused_adam
colossalai.nn.optimizer.fused_lamb
colossalai.nn.optimizer.fused_sgd
colossalai.nn.optimizer.lamb
colossalai.nn.optimizer.lars
colossalai.nn.optimizer.loss_scaler
colossalai.nn.optimizer.zero_redundancy_optimizer_level_1
colossalai.nn.optimizer.zero_redundancy_optimizer_level_2
colossalai.nn.optimizer.zero_redundancy_optimizer_level_3
.. automodule:: colossalai.nn.optimizer
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.optimizer.zero\_redundancy\_optimizer\_level\_1
=============================================================
.. automodule:: colossalai.nn.optimizer.zero_redundancy_optimizer_level_1
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.optimizer.zero\_redundancy\_optimizer\_level\_2
=============================================================
.. automodule:: colossalai.nn.optimizer.zero_redundancy_optimizer_level_2
:members:

View File

@@ -1,5 +0,0 @@
colossalai.nn.optimizer.zero\_redundancy\_optimizer\_level\_3
=============================================================
.. automodule:: colossalai.nn.optimizer.zero_redundancy_optimizer_level_3
:members:

View File

@@ -1,16 +1,15 @@
colossalai.nn
=============
.. automodule:: colossalai.nn
:members:
.. toctree::
:maxdepth: 2
colossalai.nn.data
colossalai.nn.layer
colossalai.nn.loss
colossalai.nn.lr_scheduler
colossalai.nn.model
colossalai.nn.multi_tensor_apply
colossalai.nn.optimizer
.. automodule:: colossalai.nn
:members:

View File

@@ -1,11 +1,11 @@
colossalai.registry
===================
.. automodule:: colossalai.registry
:members:
.. toctree::
:maxdepth: 2
colossalai.registry.registry
.. automodule:: colossalai.registry
:members:

View File

@@ -1,12 +1,18 @@
colossalai
==========
.. automodule:: colossalai
:members:
.. toctree::
:maxdepth: 2
colossalai.constants
colossalai.core
colossalai.initialize
.. toctree::
:maxdepth: 2
colossalai.amp
colossalai.builder
colossalai.communication
colossalai.context
@@ -16,11 +22,7 @@ colossalai
colossalai.registry
colossalai.trainer
colossalai.utils
colossalai.zero
.. toctree::
:maxdepth: 2
colossalai.constants
colossalai.core
colossalai.initialize
.. automodule:: colossalai
:members:

View File

@@ -1,9 +1,6 @@
colossalai.trainer
==================
.. automodule:: colossalai.trainer
:members:
.. toctree::
:maxdepth: 2
@@ -14,3 +11,7 @@ colossalai.trainer
:maxdepth: 2
colossalai.trainer.metric
.. automodule:: colossalai.trainer
:members:

View File

@@ -0,0 +1,5 @@
colossalai.utils.data\_sampler
=======================================
.. automodule:: colossalai.utils.data_sampler
:members:

View File

@@ -0,0 +1,5 @@
colossalai.utils.gradient\_accumulation
=======================================
.. automodule:: colossalai.utils.gradient_accumulation
:members:

View File

@@ -0,0 +1,8 @@
colossalai.nn.multi\_tensor\_apply
==================================
.. automodule:: colossalai.utils.multi_tensor_apply.multi_tensor_apply
:members:

View File

@@ -1,10 +1,6 @@
colossalai.utils
================
.. automodule:: colossalai.utils
:members:
.. toctree::
:maxdepth: 2
@@ -12,5 +8,12 @@ colossalai.utils
colossalai.utils.checkpointing
colossalai.utils.common
colossalai.utils.cuda
colossalai.utils.data_sampler
colossalai.utils.gradient_accumulation
colossalai.utils.memory
colossalai.utils.multi_tensor_apply
colossalai.utils.timer
.. automodule:: colossalai.utils
:members:

View File

@@ -0,0 +1,5 @@
colossalai.zero
================
.. automodule:: colossalai.zero
:members:

View File

@@ -18,6 +18,15 @@ fp16 = dict(
initial_scale=2 ** 8
)
# optional
# configuration for zero
# you can refer to the Zero Redundancy optimizer and zero offload section for details
# https://www.colossalai.org/zero.html
zero = dict(
level=<int>,
...
)
# optional
# if you are using complex gradient handling
# otherwise, you do not need this in your config file

View File

@@ -1,15 +1,17 @@
# Setup
## Install with pip
### PyPI
```bash
pip install colossalai
```
## Install from source
### Install From Source (Recommended)
> We **recommend** you to install from source as the Colossal-AI is updating frequently in the early versions. The documentation will be in line with the main branch of the repository. Feel free to raise an issue if you encounter any problem. :)
```shell
git clone git@github.com:hpcaitech/ColossalAI.git
git clone https://github.com/hpcaitech/ColossalAI.git
cd ColossalAI
# install dependency
pip install -r requirements/requirements.txt
@@ -22,8 +24,4 @@ Install and enable CUDA kernel fusion (compulsory installation when using fused
```shell
pip install -v --no-cache-dir --global-option="--cuda_ext" .
# install with editable enabled
pip install -v --no-cache-dir --global-option="--cuda_ext" -e .
```

View File

@@ -7,51 +7,92 @@ can also run on systems with only one GPU. Quick demos showing how to use Coloss
## Single GPU
Colossal-AI can be used to train deep learning models on systems with only one GPU and achieve baseline
performances. [Here](https://colab.research.google.com/drive/1fJnqqFzPuzZ_kn1lwCpG2nh3l2ths0KE?usp=sharing#scrollTo=cQ_y7lBG09LS)
is an example showing how to train a LeNet model on the CIFAR10 dataset using Colossal-AI.
performances. We provided an example to train ResNet on CIFAR10 data with only one GPU. You can find this example in
`examples\resnet_cifar10_data_parallel` in the repository. Detailed instructions can be found in its `README.md`.
## Multiple GPUs
Colossal-AI can be used to train deep learning models on distributed systems with multiple GPUs and accelerate the
training process drastically by applying efficient parallelization techiniques, which will be elaborated in
the [Parallelization](parallelization.md) section below. Run the code below on your distributed system with 4 GPUs,
where `HOST` is the IP address of your system. Note that we use
the [Slurm](https://slurm.schedmd.com/documentation.html) job scheduling system here.
the [Parallelization](parallelization.md) section below.
```bash
HOST=xxx.xxx.xxx.xxx srun ./scripts/slurm_dist_train.sh ./examples/run_trainer.py ./configs/vit/vit_2d.py
```
You can turn the resnet example mentioned above into a multi-GPU training by setting `--nproc_per_node` to be the number of
GPUs you have on your system. We also provide an example of Vision Transformer which relies on
training with more GPUs. You can visit this example in `examples\vit_b16_imagenet_data_parallel`. It has a detailed instructional
`README.md` for you too.
`./configs/vit/vit_2d.py` is a config file, which is introduced in the [Config file](config.md) section below. These
config files are used by Colossal-AI to define all kinds of training arguments, such as the model, dataset and training
method (optimizer, lr_scheduler, epoch, etc.). Config files are highly customizable and can be modified so as to train
different models.
`./examples/run_trainer.py` contains a standard training script and is presented below, it reads the config file and
realizes the training process.
## Sample Training Script
Below is a typical way of how you train the model using
```python
import colossalai
from colossalai.core import global_context as gpc
from colossalai.amp import AMP_TYPE
from colossalai.logging import get_dist_logger
from colossalai.trainer import Trainer
from colossalai.trainer import Trainer, hooks
from colossalai.utils import get_dataloader
CONFIG = dict(
parallel=dict(
pipeline=1,
tensor=1, mode=None
),
fp16 = dict(
mode=AMP_TYPE.TORCH
),
gradient_accumulation=4,
clip_grad_norm=1.0
)
def run_trainer():
engine, train_dataloader, test_dataloader = colossalai.initialize()
parser = colossalai.get_default_parser()
args = parser.parse_args()
colossalai.launch(config=CONFIG,
rank=args.rank,
world_size=args.world_size,
host=args.host,
port=args.port,
backend=args.backend)
logger = get_dist_logger()
logger.info("engine is built", ranks=[0])
# instantiate your compoentns
model = MyModel()
optimizer = MyOptimizer(model.parameters(), ...)
train_dataset = TrainDataset()
test_dataset = TestDataset()
train_dataloader = get_dataloader(train_dataset, ...)
test_dataloader = get_dataloader(test_dataset, ...)
lr_scheduler = MyScheduler()
logger.info("components are built")
engine, train_dataloader, test_dataloader, lr_scheduler = colossalai.initialize(model,
optimizer,
criterion,
train_dataloader,
test_dataloader,
lr_scheduler)
trainer = Trainer(engine=engine,
verbose=True)
logger.info("trainer is built", ranks=[0])
logger.info("start training", ranks=[0])
hook_list = [
hooks.LossHook(),
hooks.LRSchedulerHook(lr_scheduler=lr_scheduler, by_epoch=False),
hooks.AccuracyHook(),
hooks.TensorboardHook(log_dir='./tb_logs', ranks=[0]),
hooks.LogMetricByEpochHook(logger),
hooks.LogMemoryByEpochHook(logger),
hooks.SaveCheckpointHook(checkpoint_dir='./ckpt')
]
trainer.fit(
train_dataloader=train_dataloader,
test_dataloader=test_dataloader,
epochs=gpc.config.num_epochs,
hooks_cfg=gpc.config.hooks,
epochs=NUM_EPOCH,
hooks=hook_list,
display_progress=True,
test_interval=2
)

View File

@@ -19,6 +19,7 @@ Below are a few examples of ZeRO-3 configurations.
### Example of ZeRO-3 Configurations
You can refer to the [DeepSpeed configuration](https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training) for details.
Here we use `Adam` as the initial optimizer.
1. Use ZeRO to partition the optimizer states, gradients (level 2), and parameters (level 3).