mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-10 05:20:33 +00:00
[legacy] move trainer to legacy (#4545)
* [legacy] move trainer to legacy * [doc] update docs related to trainer * [test] ignore legacy test
This commit is contained in:
@@ -43,7 +43,7 @@ from colossalai.engine.schedule import (InterleavedPipelineSchedule,
|
||||
PipelineSchedule)
|
||||
from colossalai.logging import disable_existing_loggers, get_dist_logger
|
||||
from colossalai.nn.layer.wrapper import PipelineSharedModuleWrapper
|
||||
from colossalai.trainer import Trainer, hooks
|
||||
from colossalai.legacy.trainer import Trainer, hooks
|
||||
from colossalai.utils.timer import MultiTimer
|
||||
from model_zoo.gpt import GPTLMLoss
|
||||
from torch.nn import functional as F
|
||||
@@ -268,3 +268,4 @@ def train():
|
||||
return_output_label=False,
|
||||
)
|
||||
```
|
||||
<!-- doc-test-command: echo -->
|
||||
|
@@ -38,7 +38,7 @@ from colossalai.builder import build_pipeline_model
|
||||
from colossalai.engine.schedule import (InterleavedPipelineSchedule,
|
||||
PipelineSchedule)
|
||||
from colossalai.logging import disable_existing_loggers, get_dist_logger
|
||||
from colossalai.trainer import Trainer, hooks
|
||||
from colossalai.legacy.trainer import Trainer, hooks
|
||||
from colossalai.utils import MultiTimer, get_dataloader
|
||||
from timm.models import vision_transformer as vit
|
||||
from torchvision import transforms
|
||||
@@ -245,3 +245,4 @@ def train():
|
||||
hooks=hook_list,
|
||||
display_progress=True)
|
||||
```
|
||||
<!-- doc-test-command: echo -->
|
||||
|
@@ -79,7 +79,7 @@ from colossalai.core import global_context as gpc
|
||||
from colossalai.logging import disable_existing_loggers, get_dist_logger
|
||||
from colossalai.nn.lr_scheduler import LinearWarmupLR
|
||||
from colossalai.nn.metric import Accuracy
|
||||
from colossalai.trainer import Trainer, hooks
|
||||
from colossalai.legacy.trainer import Trainer, hooks
|
||||
```
|
||||
|
||||
- Other modules
|
||||
@@ -644,3 +644,4 @@ torchrun --standalone --nproc_per_node <NUM_GPUs> train_hybrid.py --config ./co
|
||||
# If your torch >= 1.9.0
|
||||
# python -m torch.distributed.run --standalone --nproc_per_node= <NUM_GPUs> train_hybrid.py --config ./configs/config_hybrid_parallel.py
|
||||
```
|
||||
<!-- doc-test-command: echo -->
|
||||
|
@@ -64,7 +64,7 @@ Trainer is a more high-level wrapper for the user to execute training with fewer
|
||||
|
||||
```python
|
||||
from colossalai.logging import get_dist_logger
|
||||
from colossalai.trainer import Trainer, hooks
|
||||
from colossalai.legacy.trainer import Trainer, hooks
|
||||
|
||||
# build components and initialize with colossalai.initialize
|
||||
...
|
||||
@@ -107,7 +107,7 @@ If you want to customize your own hook class, you can inherit `hooks.BaseHook` a
|
||||
|
||||
```python
|
||||
from colossalai.logging import get_dist_logger
|
||||
from colossalai.trainer import hooks
|
||||
from colossalai.legacy.trainer import hooks
|
||||
|
||||
class LogMessageHook(hooks.BaseHook):
|
||||
|
||||
@@ -345,7 +345,7 @@ If you wish to train with a trainer object, you can follow the code snippet belo
|
||||
|
||||
```python
|
||||
from colossalai.nn.metric import Accuracy
|
||||
from colossalai.trainer import Trainer, hooks
|
||||
from colossalai.legacy.trainer import Trainer, hooks
|
||||
|
||||
|
||||
# create a trainer object
|
||||
@@ -387,3 +387,4 @@ python -m torch.distributed.launch --nproc_per_node <num_gpus> --master_addr loc
|
||||
# with trainer
|
||||
python -m torch.distributed.launch --nproc_per_node <num_gpus> --master_addr localhost --master_port 29500 run_resnet_cifar10_with_trainer.py
|
||||
```
|
||||
<!-- doc-test-command: echo -->
|
||||
|
@@ -41,7 +41,7 @@ for epoch in range(num_epochs):
|
||||
|
||||
#### Save when using trainer
|
||||
```python
|
||||
from colossalai.trainer import Trainer, hooks
|
||||
from colossalai.legacy.trainer import Trainer, hooks
|
||||
model = ...
|
||||
engine, _, _, _ = colossalai.initialize(model=model, ...)
|
||||
trainer = Trainer(engine, ...)
|
||||
@@ -61,3 +61,4 @@ model = ...
|
||||
load_checkpoint('xxx.pt', model)
|
||||
... # train or test
|
||||
```
|
||||
<!-- doc-test-command: echo -->
|
||||
|
@@ -267,7 +267,7 @@ from pathlib import Path
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.logging import get_dist_logger
|
||||
from colossalai.utils import get_dataloader
|
||||
from colossalai.trainer import Trainer, hooks
|
||||
from colossalai.legacy.trainer import Trainer, hooks
|
||||
from colossalai.nn.lr_scheduler import LinearWarmupLR
|
||||
from timm.models import vit_base_patch16_224
|
||||
from torchvision import datasets, transforms
|
||||
|
@@ -79,7 +79,7 @@ import colossalai.nn as col_nn
|
||||
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.logging import disable_existing_loggers, get_dist_logger
|
||||
from colossalai.trainer import Trainer, hooks
|
||||
from colossalai.legacy.trainer import Trainer, hooks
|
||||
from colossalai.utils import MultiTimer, get_dataloader
|
||||
from colossalai.context import ParallelMode
|
||||
from colossalai.pipeline.pipelinable import PipelinableContext
|
||||
@@ -157,3 +157,4 @@ trainer.fit(train_dataloader=train_dataloader,
|
||||
```
|
||||
|
||||
We use `2` pipeline stages and the batch will be split into `4` micro batches.
|
||||
<!-- doc-test-command: echo -->
|
||||
|
Reference in New Issue
Block a user