[legacy] move trainer to legacy (#4545)

* [legacy] move trainer to legacy

* [doc] update docs related to trainer

* [test] ignore legacy test
This commit is contained in:
Hongxin Liu
2023-08-31 13:51:28 +08:00
parent 807e01a4ba
commit 89fe027787
32 changed files with 63 additions and 153 deletions

View File

@@ -43,7 +43,7 @@ from colossalai.engine.schedule import (InterleavedPipelineSchedule,
PipelineSchedule)
from colossalai.logging import disable_existing_loggers, get_dist_logger
from colossalai.nn.layer.wrapper import PipelineSharedModuleWrapper
from colossalai.trainer import Trainer, hooks
from colossalai.legacy.trainer import Trainer, hooks
from colossalai.utils.timer import MultiTimer
from model_zoo.gpt import GPTLMLoss
from torch.nn import functional as F
@@ -268,3 +268,4 @@ def train():
return_output_label=False,
)
```
<!-- doc-test-command: echo -->

View File

@@ -38,7 +38,7 @@ from colossalai.builder import build_pipeline_model
from colossalai.engine.schedule import (InterleavedPipelineSchedule,
PipelineSchedule)
from colossalai.logging import disable_existing_loggers, get_dist_logger
from colossalai.trainer import Trainer, hooks
from colossalai.legacy.trainer import Trainer, hooks
from colossalai.utils import MultiTimer, get_dataloader
from timm.models import vision_transformer as vit
from torchvision import transforms
@@ -245,3 +245,4 @@ def train():
hooks=hook_list,
display_progress=True)
```
<!-- doc-test-command: echo -->

View File

@@ -79,7 +79,7 @@ from colossalai.core import global_context as gpc
from colossalai.logging import disable_existing_loggers, get_dist_logger
from colossalai.nn.lr_scheduler import LinearWarmupLR
from colossalai.nn.metric import Accuracy
from colossalai.trainer import Trainer, hooks
from colossalai.legacy.trainer import Trainer, hooks
```
- Other modules
@@ -644,3 +644,4 @@ torchrun --standalone --nproc_per_node <NUM_GPUs> train_hybrid.py --config ./co
# If your torch >= 1.9.0
# python -m torch.distributed.run --standalone --nproc_per_node= <NUM_GPUs> train_hybrid.py --config ./configs/config_hybrid_parallel.py
```
<!-- doc-test-command: echo -->

View File

@@ -64,7 +64,7 @@ Trainer is a more high-level wrapper for the user to execute training with fewer
```python
from colossalai.logging import get_dist_logger
from colossalai.trainer import Trainer, hooks
from colossalai.legacy.trainer import Trainer, hooks
# build components and initialize with colossalai.initialize
...
@@ -107,7 +107,7 @@ If you want to customize your own hook class, you can inherit `hooks.BaseHook` a
```python
from colossalai.logging import get_dist_logger
from colossalai.trainer import hooks
from colossalai.legacy.trainer import hooks
class LogMessageHook(hooks.BaseHook):
@@ -345,7 +345,7 @@ If you wish to train with a trainer object, you can follow the code snippet belo
```python
from colossalai.nn.metric import Accuracy
from colossalai.trainer import Trainer, hooks
from colossalai.legacy.trainer import Trainer, hooks
# create a trainer object
@@ -387,3 +387,4 @@ python -m torch.distributed.launch --nproc_per_node <num_gpus> --master_addr loc
# with trainer
python -m torch.distributed.launch --nproc_per_node <num_gpus> --master_addr localhost --master_port 29500 run_resnet_cifar10_with_trainer.py
```
<!-- doc-test-command: echo -->

View File

@@ -41,7 +41,7 @@ for epoch in range(num_epochs):
#### Save when using trainer
```python
from colossalai.trainer import Trainer, hooks
from colossalai.legacy.trainer import Trainer, hooks
model = ...
engine, _, _, _ = colossalai.initialize(model=model, ...)
trainer = Trainer(engine, ...)
@@ -61,3 +61,4 @@ model = ...
load_checkpoint('xxx.pt', model)
... # train or test
```
<!-- doc-test-command: echo -->

View File

@@ -267,7 +267,7 @@ from pathlib import Path
from colossalai.core import global_context as gpc
from colossalai.logging import get_dist_logger
from colossalai.utils import get_dataloader
from colossalai.trainer import Trainer, hooks
from colossalai.legacy.trainer import Trainer, hooks
from colossalai.nn.lr_scheduler import LinearWarmupLR
from timm.models import vit_base_patch16_224
from torchvision import datasets, transforms

View File

@@ -79,7 +79,7 @@ import colossalai.nn as col_nn
from colossalai.core import global_context as gpc
from colossalai.logging import disable_existing_loggers, get_dist_logger
from colossalai.trainer import Trainer, hooks
from colossalai.legacy.trainer import Trainer, hooks
from colossalai.utils import MultiTimer, get_dataloader
from colossalai.context import ParallelMode
from colossalai.pipeline.pipelinable import PipelinableContext
@@ -157,3 +157,4 @@ trainer.fit(train_dataloader=train_dataloader,
```
We use `2` pipeline stages and the batch will be split into `4` micro batches.
<!-- doc-test-command: echo -->