mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-02 01:28:31 +00:00
[legacy] move engine to legacy (#4560)
* [legacy] move engine to legacy * [example] fix seq parallel example * [example] fix seq parallel example * [test] test gemini pluging hang * [test] test gemini pluging hang * [test] test gemini pluging hang * [test] test gemini pluging hang * [test] test gemini pluging hang * [example] update seq parallel requirements
This commit is contained in:
@@ -92,14 +92,14 @@ follow the steps below to create a new distributed initialization.
|
||||
|
||||
Gradient handlers are objects which execute the all-reduce operations on parameters' gradients. As different all-reduce
|
||||
strategies may be executed for different kinds of parallelism, users can
|
||||
inherit `colossalai.engine.gradient_handler.BaseGradientHandler` to implement their strategies. Currently, the library
|
||||
inherit `colossalai.legacy.engine.gradient_handler.BaseGradientHandler` to implement their strategies. Currently, the library
|
||||
uses the normal data parallel gradient handler which all-reduces the gradients across data parallel ranks. The data
|
||||
parallel gradient handler is added to the engine automatically if data parallel is detected. You can add your own
|
||||
gradient handler like below:
|
||||
|
||||
```python
|
||||
from colossalai.registry import GRADIENT_HANDLER
|
||||
from colossalai.engine import BaseGradientHandler
|
||||
from colossalai.legacy.engine import BaseGradientHandler
|
||||
|
||||
@GRADIENT_HANDLER.register_module
|
||||
class YourGradientHandler(BaseGradientHandler):
|
||||
@@ -121,4 +121,5 @@ gradient_handlers = [
|
||||
|
||||
Schedule entails how to execute a forward and backward pass. Currently, Colossal-AI provides pipeline and non-pipeline
|
||||
schedules. If you want to modify how the forward and backward passes are executed, you can
|
||||
inherit `colossalai.engine.schedule.BaseSchedule` and implement the `forward_back_step` function.
|
||||
inherit `colossalai.legacy.engine.schedule.BaseSchedule` and implement the `forward_back_step` function.
|
||||
<!-- doc-test-command: echo -->
|
||||
|
@@ -39,7 +39,7 @@ from colossalai.amp import AMP_TYPE
|
||||
from colossalai.builder.pipeline import partition_uniform
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.engine.schedule import (InterleavedPipelineSchedule,
|
||||
from colossalai.legacy.engine.schedule import (InterleavedPipelineSchedule,
|
||||
PipelineSchedule)
|
||||
from colossalai.logging import disable_existing_loggers, get_dist_logger
|
||||
from colossalai.nn.layer.wrapper import PipelineSharedModuleWrapper
|
||||
|
@@ -35,7 +35,7 @@ import colossalai.nn as col_nn
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from colossalai.builder import build_pipeline_model
|
||||
from colossalai.engine.schedule import (InterleavedPipelineSchedule,
|
||||
from colossalai.legacy.engine.schedule import (InterleavedPipelineSchedule,
|
||||
PipelineSchedule)
|
||||
from colossalai.logging import disable_existing_loggers, get_dist_logger
|
||||
from colossalai.legacy.trainer import Trainer, hooks
|
||||
|
@@ -415,7 +415,7 @@ def build_pipeline_vit(num_layers, num_chunks, device=torch.device('cuda'), **kw
|
||||
|
||||
#### Import modules
|
||||
```python
|
||||
from colossalai.engine.schedule import (InterleavedPipelineSchedule,
|
||||
from colossalai.legacy.engine.schedule import (InterleavedPipelineSchedule,
|
||||
PipelineSchedule)
|
||||
from colossalai.utils import MultiTimer
|
||||
import os
|
||||
|
@@ -29,7 +29,7 @@ To implement a customized gradient handler, you need to follow these steps.
|
||||
|
||||
```python
|
||||
from colossalai.registry import GRADIENT_HANDLER
|
||||
from colossalai.engine.gradient_handler import BaseGradientHandler
|
||||
from colossalai.legacy.engine.gradient_handler import BaseGradientHandler
|
||||
|
||||
|
||||
@GRADIENT_HANDLER.register_module
|
||||
@@ -61,3 +61,4 @@ to demonstrate the use of gradient handler. In this example, we used `DataParall
|
||||
```shell
|
||||
python -m torch.distributed.launch --nproc_per_node 4 --master_addr localhost --master_port 29500 train_with_engine.py
|
||||
```
|
||||
<!-- doc-test-command: echo -->
|
||||
|
@@ -81,14 +81,14 @@ Colossal-AI 为用户提供了一个全局 context,使他们能够轻松地管
|
||||
## 梯度 Handler
|
||||
|
||||
梯度 handler 是对参数的梯度执行 all-reduce 操作的对象。由于不同的 all-reduce 策略或许在不同的并行中被执行,用户可以继承
|
||||
`colossalai.engine.gradient_handler.BaseGradientHandler` 来实现其策略。目前,Colossal-AI 使用普通的数据并行梯度 handler 在数据并行的 rank 间 all-reduce 梯度。
|
||||
`colossalai.legacy.engine.gradient_handler.BaseGradientHandler` 来实现其策略。目前,Colossal-AI 使用普通的数据并行梯度 handler 在数据并行的 rank 间 all-reduce 梯度。
|
||||
如果数据并行被检测到,梯度 handler 会被自动添加进 engine。
|
||||
|
||||
你可以添加你自己的梯度 handler,如下所示:
|
||||
|
||||
```python
|
||||
from colossalai.registry import GRADIENT_HANDLER
|
||||
from colossalai.engine import BaseGradientHandler
|
||||
from colossalai.legacy.engine import BaseGradientHandler
|
||||
|
||||
@GRADIENT_HANDLER.register_module
|
||||
class YourGradientHandler(BaseGradientHandler):
|
||||
@@ -109,4 +109,5 @@ gradient_handlers = [
|
||||
## Schedule
|
||||
|
||||
Schedule 包含了如何执行前向和后向计算。目前, Colossal-AI 提供了流水和非流水的 schedule。
|
||||
如果你想修改前向和后向计算的执行方式,你可以继承 `colossalai.engine.schedule.BaseSchedule` 并实现 `forward_back_step` 函数。
|
||||
如果你想修改前向和后向计算的执行方式,你可以继承 `colossalai.legacy.engine.schedule.BaseSchedule` 并实现 `forward_back_step` 函数。
|
||||
<!-- doc-test-command: echo -->
|
||||
|
@@ -39,7 +39,7 @@ from colossalai.amp import AMP_TYPE
|
||||
from colossalai.builder.pipeline import partition_uniform
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.engine.schedule import (InterleavedPipelineSchedule,
|
||||
from colossalai.legacy.engine.schedule import (InterleavedPipelineSchedule,
|
||||
PipelineSchedule)
|
||||
from colossalai.logging import disable_existing_loggers, get_dist_logger
|
||||
from colossalai.nn.layer.wrapper import PipelineSharedModuleWrapper
|
||||
|
@@ -33,7 +33,7 @@ import colossalai.nn as col_nn
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from colossalai.builder import build_pipeline_model
|
||||
from colossalai.engine.schedule import (InterleavedPipelineSchedule,
|
||||
from colossalai.legacy.engine.schedule import (InterleavedPipelineSchedule,
|
||||
PipelineSchedule)
|
||||
from colossalai.logging import disable_existing_loggers, get_dist_logger
|
||||
from colossalai.legacy.trainer import Trainer, hooks
|
||||
|
@@ -380,7 +380,7 @@ def build_pipeline_vit(num_layers, num_chunks, device=torch.device('cuda'), **kw
|
||||
|
||||
#### 导入模块
|
||||
```python
|
||||
from colossalai.engine.schedule import (InterleavedPipelineSchedule,
|
||||
from colossalai.legacy.engine.schedule import (InterleavedPipelineSchedule,
|
||||
PipelineSchedule)
|
||||
from colossalai.utils import MultiTimer
|
||||
import os
|
||||
|
@@ -26,7 +26,7 @@
|
||||
|
||||
```python
|
||||
from colossalai.registry import GRADIENT_HANDLER
|
||||
from colossalai.engine.gradient_handler import BaseGradientHandler
|
||||
from colossalai.legacy.engine.gradient_handler import BaseGradientHandler
|
||||
|
||||
|
||||
@GRADIENT_HANDLER.register_module
|
||||
@@ -57,3 +57,4 @@ gradient_handler = [dict(type='MyGradientHandler')]
|
||||
```shell
|
||||
python -m torch.distributed.launch --nproc_per_node 4 --master_addr localhost --master_port 29500 train_with_engine.py
|
||||
```
|
||||
<!-- doc-test-command: echo -->
|
||||
|
Reference in New Issue
Block a user