mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-02 09:38:05 +00:00
update examples and sphnix docs for the new api (#63)
This commit is contained in:
@@ -77,10 +77,10 @@ fp16 = dict(
|
||||
)
|
||||
```
|
||||
|
||||
## Tensor Parallel AMP
|
||||
## Naive AMP
|
||||
|
||||
We leveraged the Megatron-LM implementation to achieve mixed precision training while maintaining compatibility with complex tensor
|
||||
and pipeline parallelism.
|
||||
and pipeline parallelism. This AMP mode will cast all operations into fp16.
|
||||
|
||||
The following conde block show a config file for this mode.
|
||||
|
||||
|
5
docs/colossalai/colossalai.amp.apex_amp.rst
Normal file
5
docs/colossalai/colossalai.amp.apex_amp.rst
Normal file
@@ -0,0 +1,5 @@
|
||||
colossalai.amp.apex\_amp
|
||||
==========================
|
||||
|
||||
.. automodule:: colossalai.amp.apex_amp
|
||||
:members:
|
5
docs/colossalai/colossalai.amp.naive_amp.rst
Normal file
5
docs/colossalai/colossalai.amp.naive_amp.rst
Normal file
@@ -0,0 +1,5 @@
|
||||
colossalai.amp.naive\_amp
|
||||
==========================
|
||||
|
||||
.. automodule:: colossalai.amp.naive_amp
|
||||
:members:
|
13
docs/colossalai/colossalai.amp.rst
Normal file
13
docs/colossalai/colossalai.amp.rst
Normal file
@@ -0,0 +1,13 @@
|
||||
colossalai.amp
|
||||
==================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.amp.torch_amp
|
||||
colossalai.amp.apex_amp
|
||||
colossalai.amp.naive_amp
|
||||
|
||||
|
||||
.. automodule:: colossalai.amp
|
||||
:members:
|
5
docs/colossalai/colossalai.amp.torch_amp.rst
Normal file
5
docs/colossalai/colossalai.amp.torch_amp.rst
Normal file
@@ -0,0 +1,5 @@
|
||||
colossalai.amp.torch\_amp
|
||||
==========================
|
||||
|
||||
.. automodule:: colossalai.amp.torch_amp
|
||||
:members:
|
@@ -1,12 +1,12 @@
|
||||
colossalai.builder
|
||||
==================
|
||||
|
||||
.. automodule:: colossalai.builder
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.builder.builder
|
||||
colossalai.builder.pipeline
|
||||
|
||||
|
||||
.. automodule:: colossalai.builder
|
||||
:members:
|
||||
|
@@ -1,5 +0,0 @@
|
||||
colossalai.checkpointing
|
||||
========================
|
||||
|
||||
.. automodule:: colossalai.checkpointing
|
||||
:members:
|
@@ -1,10 +1,6 @@
|
||||
colossalai.communication
|
||||
========================
|
||||
|
||||
.. automodule:: colossalai.communication
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
@@ -12,3 +8,7 @@ colossalai.communication
|
||||
colossalai.communication.p2p
|
||||
colossalai.communication.ring
|
||||
colossalai.communication.utils
|
||||
|
||||
|
||||
.. automodule:: colossalai.communication
|
||||
:members:
|
||||
|
@@ -1,11 +1,11 @@
|
||||
colossalai.context.random
|
||||
=========================
|
||||
|
||||
.. automodule:: colossalai.context.random
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.context.random.seed_manager
|
||||
|
||||
|
||||
.. automodule:: colossalai.context.random
|
||||
:members:
|
||||
|
@@ -1,9 +1,6 @@
|
||||
colossalai.context
|
||||
==================
|
||||
|
||||
.. automodule:: colossalai.context
|
||||
:members:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
@@ -17,3 +14,7 @@ colossalai.context
|
||||
colossalai.context.config
|
||||
colossalai.context.parallel_context
|
||||
colossalai.context.parallel_mode
|
||||
|
||||
|
||||
.. automodule:: colossalai.context
|
||||
:members:
|
||||
|
@@ -1,5 +0,0 @@
|
||||
colossalai.engine.amp.amp\_type
|
||||
===============================
|
||||
|
||||
.. automodule:: colossalai.engine.amp.amp_type
|
||||
:members:
|
@@ -1,5 +0,0 @@
|
||||
colossalai.engine.amp.grad\_scaler
|
||||
==================================
|
||||
|
||||
.. automodule:: colossalai.engine.amp.grad_scaler
|
||||
:members:
|
@@ -1,12 +0,0 @@
|
||||
colossalai.engine.amp
|
||||
=====================
|
||||
|
||||
.. automodule:: colossalai.engine.amp
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.engine.amp.amp_type
|
||||
colossalai.engine.amp.grad_scaler
|
@@ -1,12 +1,12 @@
|
||||
colossalai.engine
|
||||
=================
|
||||
|
||||
.. automodule:: colossalai.engine
|
||||
:members:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.engine.amp
|
||||
colossalai.engine.gradient_handler
|
||||
colossalai.engine.schedule
|
||||
|
||||
|
||||
.. automodule:: colossalai.engine
|
||||
:members:
|
||||
|
@@ -1,11 +1,11 @@
|
||||
colossalai.logging
|
||||
==================
|
||||
|
||||
.. automodule:: colossalai.logging
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.logging.logging
|
||||
|
||||
|
||||
.. automodule:: colossalai.logging
|
||||
:members:
|
||||
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.data.base\_dataset
|
||||
================================
|
||||
|
||||
.. automodule:: colossalai.nn.data.base_dataset
|
||||
:members:
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.data.caltech101\_dataset
|
||||
======================================
|
||||
|
||||
.. automodule:: colossalai.nn.data.caltech101_dataset
|
||||
:members:
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.data.cifar10\_dataset
|
||||
===================================
|
||||
|
||||
.. automodule:: colossalai.nn.data.cifar10_dataset
|
||||
:members:
|
@@ -1,18 +0,0 @@
|
||||
colossalai.nn.data
|
||||
==================
|
||||
|
||||
.. automodule:: colossalai.nn.data
|
||||
:members:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.data.sampler
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.data.base_dataset
|
||||
colossalai.nn.data.caltech101_dataset
|
||||
colossalai.nn.data.cifar10_dataset
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.data.sampler.base\_sampler
|
||||
========================================
|
||||
|
||||
.. automodule:: colossalai.nn.data.sampler.base_sampler
|
||||
:members:
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.data.sampler.data\_parallel\_sampler
|
||||
==================================================
|
||||
|
||||
.. automodule:: colossalai.nn.data.sampler.data_parallel_sampler
|
||||
:members:
|
@@ -1,12 +0,0 @@
|
||||
colossalai.nn.data.sampler
|
||||
==========================
|
||||
|
||||
.. automodule:: colossalai.nn.data.sampler
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.data.sampler.base_sampler
|
||||
colossalai.nn.data.sampler.data_parallel_sampler
|
@@ -0,0 +1,5 @@
|
||||
colossalai.nn.layer.non\_parallel\_layers
|
||||
======================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.non_parallel_layers
|
||||
:members:
|
@@ -1,11 +1,11 @@
|
||||
colossalai.nn.layer.parallel\_1d
|
||||
================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.parallel_1d
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.layer.parallel_1d.layers
|
||||
|
||||
|
||||
.. automodule:: colossalai.nn.layer.parallel_1d
|
||||
:members:
|
||||
|
@@ -1,11 +1,11 @@
|
||||
colossalai.nn.layer.parallel\_2d
|
||||
================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.parallel_2d
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.layer.parallel_2d.layers
|
||||
|
||||
|
||||
.. automodule:: colossalai.nn.layer.parallel_2d
|
||||
:members:
|
||||
|
@@ -1,11 +1,11 @@
|
||||
colossalai.nn.layer.parallel\_2p5d
|
||||
==================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.parallel_2p5d
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.layer.parallel_2p5d.layers
|
||||
|
||||
|
||||
.. automodule:: colossalai.nn.layer.parallel_2p5d
|
||||
:members:
|
||||
|
@@ -1,11 +1,11 @@
|
||||
colossalai.nn.layer.parallel\_3d
|
||||
================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.parallel_3d
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.layer.parallel_3d.layers
|
||||
|
||||
|
||||
.. automodule:: colossalai.nn.layer.parallel_3d
|
||||
:members:
|
||||
|
@@ -1,11 +1,11 @@
|
||||
colossalai.nn.layer.parallel\_sequence
|
||||
======================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.parallel_sequence
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.layer.parallel_sequence.layers
|
||||
|
||||
|
||||
.. automodule:: colossalai.nn.layer.parallel_sequence
|
||||
:members:
|
||||
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.layer.parallel\_vision\_transformer.layers
|
||||
========================================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.parallel_vision_transformer.layers
|
||||
:members:
|
@@ -1,11 +0,0 @@
|
||||
colossalai.nn.layer.parallel\_vision\_transformer
|
||||
=================================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.parallel_vision_transformer
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.layer.parallel_vision_transformer.layers
|
@@ -1,9 +1,6 @@
|
||||
colossalai.nn.layer
|
||||
===================
|
||||
|
||||
.. automodule:: colossalai.nn.layer
|
||||
:members:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
@@ -12,13 +9,10 @@ colossalai.nn.layer
|
||||
colossalai.nn.layer.parallel_2p5d
|
||||
colossalai.nn.layer.parallel_3d
|
||||
colossalai.nn.layer.parallel_sequence
|
||||
colossalai.nn.layer.parallel_vision_transformer
|
||||
colossalai.nn.layer.vanilla_resnet
|
||||
colossalai.nn.layer.vanilla_vision_transformer
|
||||
colossalai.nn.layer.non_parallel_layers
|
||||
colossalai.nn.layer.wrapper
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.layer.base_layer
|
||||
|
||||
|
||||
.. automodule:: colossalai.nn.layer
|
||||
:members:
|
||||
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.layer.vanilla\_resnet.basic\_block
|
||||
================================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.vanilla_resnet.basic_block
|
||||
:members:
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.layer.vanilla\_resnet.bottleneck
|
||||
==============================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.vanilla_resnet.bottleneck
|
||||
:members:
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.layer.vanilla\_resnet.conv
|
||||
========================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.vanilla_resnet.conv
|
||||
:members:
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.layer.vanilla\_resnet.reslayer
|
||||
============================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.vanilla_resnet.reslayer
|
||||
:members:
|
@@ -1,14 +0,0 @@
|
||||
colossalai.nn.layer.vanilla\_resnet
|
||||
===================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.vanilla_resnet
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.layer.vanilla_resnet.basic_block
|
||||
colossalai.nn.layer.vanilla_resnet.bottleneck
|
||||
colossalai.nn.layer.vanilla_resnet.conv
|
||||
colossalai.nn.layer.vanilla_resnet.reslayer
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.layer.vanilla\_vision\_transformer.layers
|
||||
=======================================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.vanilla_vision_transformer.layers
|
||||
:members:
|
@@ -1,11 +0,0 @@
|
||||
colossalai.nn.layer.vanilla\_vision\_transformer
|
||||
================================================
|
||||
|
||||
.. automodule:: colossalai.nn.layer.vanilla_vision_transformer
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.layer.vanilla_vision_transformer.layers
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.loss.base\_loss
|
||||
=============================
|
||||
|
||||
.. automodule:: colossalai.nn.loss.base_loss
|
||||
:members:
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.loss.cross\_entropy\_1d
|
||||
=====================================
|
||||
|
||||
.. automodule:: colossalai.nn.loss.cross_entropy_1d
|
||||
:members:
|
@@ -1,15 +1,13 @@
|
||||
colossalai.nn.loss
|
||||
==================
|
||||
|
||||
.. automodule:: colossalai.nn.loss
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.loss.base_loss
|
||||
colossalai.nn.loss.cross_entropy_1d
|
||||
colossalai.nn.loss.cross_entropy_2d
|
||||
colossalai.nn.loss.cross_entropy_2p5d
|
||||
colossalai.nn.loss.cross_entropy_3d
|
||||
|
||||
|
||||
.. automodule:: colossalai.nn.loss
|
||||
:members:
|
||||
|
@@ -1,10 +1,6 @@
|
||||
colossalai.nn.lr\_scheduler
|
||||
===========================
|
||||
|
||||
.. automodule:: colossalai.nn.lr_scheduler
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
@@ -15,3 +11,7 @@ colossalai.nn.lr\_scheduler
|
||||
colossalai.nn.lr_scheduler.onecycle
|
||||
colossalai.nn.lr_scheduler.poly
|
||||
colossalai.nn.lr_scheduler.torch
|
||||
|
||||
|
||||
.. automodule:: colossalai.nn.lr_scheduler
|
||||
:members:
|
||||
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.model.base\_model
|
||||
===============================
|
||||
|
||||
.. automodule:: colossalai.nn.model.base_model
|
||||
:members:
|
@@ -0,0 +1,5 @@
|
||||
colossalai.nn.model.model\_from\_config
|
||||
===============================
|
||||
|
||||
.. automodule:: colossalai.nn.model.model_from_config
|
||||
:members:
|
@@ -1,17 +1,7 @@
|
||||
colossalai.nn.model
|
||||
===================
|
||||
|
||||
.. automodule:: colossalai.nn.model
|
||||
:members:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.model.vanilla_resnet
|
||||
colossalai.nn.model.vision_transformer
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.model.base_model
|
||||
colossalai.nn.model.model_from_config
|
||||
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.model.vanilla\_resnet.resnet
|
||||
==========================================
|
||||
|
||||
.. automodule:: colossalai.nn.model.vanilla_resnet.resnet
|
||||
:members:
|
@@ -1,11 +0,0 @@
|
||||
colossalai.nn.model.vanilla\_resnet
|
||||
===================================
|
||||
|
||||
.. automodule:: colossalai.nn.model.vanilla_resnet
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.model.vanilla_resnet.resnet
|
@@ -1,11 +0,0 @@
|
||||
colossalai.nn.model.vision\_transformer
|
||||
=======================================
|
||||
|
||||
.. automodule:: colossalai.nn.model.vision_transformer
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.model.vision_transformer.vision_transformer
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.model.vision\_transformer.vision\_transformer
|
||||
===========================================================
|
||||
|
||||
.. automodule:: colossalai.nn.model.vision_transformer.vision_transformer
|
||||
:members:
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.multi\_tensor\_apply.multi\_tensor\_apply
|
||||
=======================================================
|
||||
|
||||
.. automodule:: colossalai.nn.multi_tensor_apply.multi_tensor_apply
|
||||
:members:
|
@@ -1,11 +0,0 @@
|
||||
colossalai.nn.multi\_tensor\_apply
|
||||
==================================
|
||||
|
||||
.. automodule:: colossalai.nn.multi_tensor_apply
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.multi_tensor_apply.multi_tensor_apply
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.optimizer.fp16\_optimizer
|
||||
=======================================
|
||||
|
||||
.. automodule:: colossalai.nn.optimizer.fp16_optimizer
|
||||
:members:
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.optimizer.loss\_scaler
|
||||
====================================
|
||||
|
||||
.. automodule:: colossalai.nn.optimizer.loss_scaler
|
||||
:members:
|
@@ -1,20 +1,15 @@
|
||||
colossalai.nn.optimizer
|
||||
=======================
|
||||
|
||||
.. automodule:: colossalai.nn.optimizer
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.optimizer.fp16_optimizer
|
||||
colossalai.nn.optimizer.fused_adam
|
||||
colossalai.nn.optimizer.fused_lamb
|
||||
colossalai.nn.optimizer.fused_sgd
|
||||
colossalai.nn.optimizer.lamb
|
||||
colossalai.nn.optimizer.lars
|
||||
colossalai.nn.optimizer.loss_scaler
|
||||
colossalai.nn.optimizer.zero_redundancy_optimizer_level_1
|
||||
colossalai.nn.optimizer.zero_redundancy_optimizer_level_2
|
||||
colossalai.nn.optimizer.zero_redundancy_optimizer_level_3
|
||||
|
||||
|
||||
.. automodule:: colossalai.nn.optimizer
|
||||
:members:
|
||||
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.optimizer.zero\_redundancy\_optimizer\_level\_1
|
||||
=============================================================
|
||||
|
||||
.. automodule:: colossalai.nn.optimizer.zero_redundancy_optimizer_level_1
|
||||
:members:
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.optimizer.zero\_redundancy\_optimizer\_level\_2
|
||||
=============================================================
|
||||
|
||||
.. automodule:: colossalai.nn.optimizer.zero_redundancy_optimizer_level_2
|
||||
:members:
|
@@ -1,5 +0,0 @@
|
||||
colossalai.nn.optimizer.zero\_redundancy\_optimizer\_level\_3
|
||||
=============================================================
|
||||
|
||||
.. automodule:: colossalai.nn.optimizer.zero_redundancy_optimizer_level_3
|
||||
:members:
|
@@ -1,16 +1,15 @@
|
||||
colossalai.nn
|
||||
=============
|
||||
|
||||
.. automodule:: colossalai.nn
|
||||
:members:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.nn.data
|
||||
colossalai.nn.layer
|
||||
colossalai.nn.loss
|
||||
colossalai.nn.lr_scheduler
|
||||
colossalai.nn.model
|
||||
colossalai.nn.multi_tensor_apply
|
||||
colossalai.nn.optimizer
|
||||
|
||||
|
||||
.. automodule:: colossalai.nn
|
||||
:members:
|
||||
|
@@ -1,11 +1,11 @@
|
||||
colossalai.registry
|
||||
===================
|
||||
|
||||
.. automodule:: colossalai.registry
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.registry.registry
|
||||
|
||||
|
||||
.. automodule:: colossalai.registry
|
||||
:members:
|
||||
|
@@ -1,12 +1,18 @@
|
||||
colossalai
|
||||
==========
|
||||
|
||||
.. automodule:: colossalai
|
||||
:members:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.constants
|
||||
colossalai.core
|
||||
colossalai.initialize
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.amp
|
||||
colossalai.builder
|
||||
colossalai.communication
|
||||
colossalai.context
|
||||
@@ -16,11 +22,7 @@ colossalai
|
||||
colossalai.registry
|
||||
colossalai.trainer
|
||||
colossalai.utils
|
||||
colossalai.zero
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.constants
|
||||
colossalai.core
|
||||
colossalai.initialize
|
||||
.. automodule:: colossalai
|
||||
:members:
|
||||
|
@@ -1,9 +1,6 @@
|
||||
colossalai.trainer
|
||||
==================
|
||||
|
||||
.. automodule:: colossalai.trainer
|
||||
:members:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
@@ -14,3 +11,7 @@ colossalai.trainer
|
||||
:maxdepth: 2
|
||||
|
||||
colossalai.trainer.metric
|
||||
|
||||
|
||||
.. automodule:: colossalai.trainer
|
||||
:members:
|
||||
|
5
docs/colossalai/colossalai.utils.data_sampler.rst
Normal file
5
docs/colossalai/colossalai.utils.data_sampler.rst
Normal file
@@ -0,0 +1,5 @@
|
||||
colossalai.utils.data\_sampler
|
||||
=======================================
|
||||
|
||||
.. automodule:: colossalai.utils.data_sampler
|
||||
:members:
|
@@ -0,0 +1,5 @@
|
||||
colossalai.utils.gradient\_accumulation
|
||||
=======================================
|
||||
|
||||
.. automodule:: colossalai.utils.gradient_accumulation
|
||||
:members:
|
8
docs/colossalai/colossalai.utils.multi_tensor_apply.rst
Normal file
8
docs/colossalai/colossalai.utils.multi_tensor_apply.rst
Normal file
@@ -0,0 +1,8 @@
|
||||
colossalai.nn.multi\_tensor\_apply
|
||||
==================================
|
||||
|
||||
.. automodule:: colossalai.utils.multi_tensor_apply.multi_tensor_apply
|
||||
:members:
|
||||
|
||||
|
||||
|
@@ -1,10 +1,6 @@
|
||||
colossalai.utils
|
||||
================
|
||||
|
||||
.. automodule:: colossalai.utils
|
||||
:members:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
@@ -12,5 +8,12 @@ colossalai.utils
|
||||
colossalai.utils.checkpointing
|
||||
colossalai.utils.common
|
||||
colossalai.utils.cuda
|
||||
colossalai.utils.data_sampler
|
||||
colossalai.utils.gradient_accumulation
|
||||
colossalai.utils.memory
|
||||
colossalai.utils.multi_tensor_apply
|
||||
colossalai.utils.timer
|
||||
|
||||
|
||||
.. automodule:: colossalai.utils
|
||||
:members:
|
||||
|
5
docs/colossalai/colossalai.zero.rst
Normal file
5
docs/colossalai/colossalai.zero.rst
Normal file
@@ -0,0 +1,5 @@
|
||||
colossalai.zero
|
||||
================
|
||||
|
||||
.. automodule:: colossalai.zero
|
||||
:members:
|
@@ -18,6 +18,15 @@ fp16 = dict(
|
||||
initial_scale=2 ** 8
|
||||
)
|
||||
|
||||
# optional
|
||||
# configuration for zero
|
||||
# you can refer to the Zero Redundancy optimizer and zero offload section for details
|
||||
# https://www.colossalai.org/zero.html
|
||||
zero = dict(
|
||||
level=<int>,
|
||||
...
|
||||
)
|
||||
|
||||
# optional
|
||||
# if you are using complex gradient handling
|
||||
# otherwise, you do not need this in your config file
|
||||
|
@@ -1,15 +1,17 @@
|
||||
# Setup
|
||||
|
||||
## Install with pip
|
||||
### PyPI
|
||||
|
||||
```bash
|
||||
pip install colossalai
|
||||
```
|
||||
|
||||
## Install from source
|
||||
### Install From Source (Recommended)
|
||||
|
||||
> We **recommend** you to install from source as the Colossal-AI is updating frequently in the early versions. The documentation will be in line with the main branch of the repository. Feel free to raise an issue if you encounter any problem. :)
|
||||
|
||||
```shell
|
||||
git clone git@github.com:hpcaitech/ColossalAI.git
|
||||
git clone https://github.com/hpcaitech/ColossalAI.git
|
||||
cd ColossalAI
|
||||
# install dependency
|
||||
pip install -r requirements/requirements.txt
|
||||
@@ -22,8 +24,4 @@ Install and enable CUDA kernel fusion (compulsory installation when using fused
|
||||
|
||||
```shell
|
||||
pip install -v --no-cache-dir --global-option="--cuda_ext" .
|
||||
|
||||
# install with editable enabled
|
||||
pip install -v --no-cache-dir --global-option="--cuda_ext" -e .
|
||||
```
|
||||
|
||||
|
@@ -7,51 +7,92 @@ can also run on systems with only one GPU. Quick demos showing how to use Coloss
|
||||
## Single GPU
|
||||
|
||||
Colossal-AI can be used to train deep learning models on systems with only one GPU and achieve baseline
|
||||
performances. [Here](https://colab.research.google.com/drive/1fJnqqFzPuzZ_kn1lwCpG2nh3l2ths0KE?usp=sharing#scrollTo=cQ_y7lBG09LS)
|
||||
is an example showing how to train a LeNet model on the CIFAR10 dataset using Colossal-AI.
|
||||
performances. We provided an example to train ResNet on CIFAR10 data with only one GPU. You can find this example in
|
||||
`examples\resnet_cifar10_data_parallel` in the repository. Detailed instructions can be found in its `README.md`.
|
||||
|
||||
## Multiple GPUs
|
||||
|
||||
Colossal-AI can be used to train deep learning models on distributed systems with multiple GPUs and accelerate the
|
||||
training process drastically by applying efficient parallelization techiniques, which will be elaborated in
|
||||
the [Parallelization](parallelization.md) section below. Run the code below on your distributed system with 4 GPUs,
|
||||
where `HOST` is the IP address of your system. Note that we use
|
||||
the [Slurm](https://slurm.schedmd.com/documentation.html) job scheduling system here.
|
||||
the [Parallelization](parallelization.md) section below.
|
||||
|
||||
```bash
|
||||
HOST=xxx.xxx.xxx.xxx srun ./scripts/slurm_dist_train.sh ./examples/run_trainer.py ./configs/vit/vit_2d.py
|
||||
```
|
||||
You can turn the resnet example mentioned above into a multi-GPU training by setting `--nproc_per_node` to be the number of
|
||||
GPUs you have on your system. We also provide an example of Vision Transformer which relies on
|
||||
training with more GPUs. You can visit this example in `examples\vit_b16_imagenet_data_parallel`. It has a detailed instructional
|
||||
`README.md` for you too.
|
||||
|
||||
`./configs/vit/vit_2d.py` is a config file, which is introduced in the [Config file](config.md) section below. These
|
||||
config files are used by Colossal-AI to define all kinds of training arguments, such as the model, dataset and training
|
||||
method (optimizer, lr_scheduler, epoch, etc.). Config files are highly customizable and can be modified so as to train
|
||||
different models.
|
||||
`./examples/run_trainer.py` contains a standard training script and is presented below, it reads the config file and
|
||||
realizes the training process.
|
||||
|
||||
## Sample Training Script
|
||||
|
||||
Below is a typical way of how you train the model using
|
||||
|
||||
```python
|
||||
import colossalai
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.amp import AMP_TYPE
|
||||
from colossalai.logging import get_dist_logger
|
||||
from colossalai.trainer import Trainer
|
||||
from colossalai.trainer import Trainer, hooks
|
||||
from colossalai.utils import get_dataloader
|
||||
|
||||
|
||||
CONFIG = dict(
|
||||
parallel=dict(
|
||||
pipeline=1,
|
||||
tensor=1, mode=None
|
||||
),
|
||||
fp16 = dict(
|
||||
mode=AMP_TYPE.TORCH
|
||||
),
|
||||
gradient_accumulation=4,
|
||||
clip_grad_norm=1.0
|
||||
)
|
||||
|
||||
def run_trainer():
|
||||
engine, train_dataloader, test_dataloader = colossalai.initialize()
|
||||
parser = colossalai.get_default_parser()
|
||||
args = parser.parse_args()
|
||||
colossalai.launch(config=CONFIG,
|
||||
rank=args.rank,
|
||||
world_size=args.world_size,
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
backend=args.backend)
|
||||
|
||||
logger = get_dist_logger()
|
||||
|
||||
logger.info("engine is built", ranks=[0])
|
||||
# instantiate your compoentns
|
||||
model = MyModel()
|
||||
optimizer = MyOptimizer(model.parameters(), ...)
|
||||
train_dataset = TrainDataset()
|
||||
test_dataset = TestDataset()
|
||||
train_dataloader = get_dataloader(train_dataset, ...)
|
||||
test_dataloader = get_dataloader(test_dataset, ...)
|
||||
lr_scheduler = MyScheduler()
|
||||
logger.info("components are built")
|
||||
|
||||
engine, train_dataloader, test_dataloader, lr_scheduler = colossalai.initialize(model,
|
||||
optimizer,
|
||||
criterion,
|
||||
train_dataloader,
|
||||
test_dataloader,
|
||||
lr_scheduler)
|
||||
|
||||
trainer = Trainer(engine=engine,
|
||||
verbose=True)
|
||||
logger.info("trainer is built", ranks=[0])
|
||||
|
||||
logger.info("start training", ranks=[0])
|
||||
hook_list = [
|
||||
hooks.LossHook(),
|
||||
hooks.LRSchedulerHook(lr_scheduler=lr_scheduler, by_epoch=False),
|
||||
hooks.AccuracyHook(),
|
||||
hooks.TensorboardHook(log_dir='./tb_logs', ranks=[0]),
|
||||
hooks.LogMetricByEpochHook(logger),
|
||||
hooks.LogMemoryByEpochHook(logger),
|
||||
hooks.SaveCheckpointHook(checkpoint_dir='./ckpt')
|
||||
]
|
||||
|
||||
trainer.fit(
|
||||
train_dataloader=train_dataloader,
|
||||
test_dataloader=test_dataloader,
|
||||
epochs=gpc.config.num_epochs,
|
||||
hooks_cfg=gpc.config.hooks,
|
||||
epochs=NUM_EPOCH,
|
||||
hooks=hook_list,
|
||||
display_progress=True,
|
||||
test_interval=2
|
||||
)
|
||||
|
@@ -19,6 +19,7 @@ Below are a few examples of ZeRO-3 configurations.
|
||||
|
||||
### Example of ZeRO-3 Configurations
|
||||
|
||||
You can refer to the [DeepSpeed configuration](https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training) for details.
|
||||
Here we use `Adam` as the initial optimizer.
|
||||
|
||||
1. Use ZeRO to partition the optimizer states, gradients (level 2), and parameters (level 3).
|
||||
|
Reference in New Issue
Block a user