[legacy] move communication and nn to legacy and refactor logger (#4671)

* [legacy] move communication to legacy (#4640)

* [legacy] refactor logger and clean up legacy codes (#4654)

* [legacy] make logger independent to gpc

* [legacy] make optim independent to registry

* [legacy] move test engine to legacy

* [legacy] move nn to legacy (#4656)

* [legacy] move nn to legacy

* [checkpointio] fix save hf config

* [test] remove useledd rpc pp test

* [legacy] fix nn init

* [example] skip tutorial hybriad parallel example

* [devops] test doc check

* [devops] test doc check
This commit is contained in:
Hongxin Liu
2023-09-11 16:24:28 +08:00
committed by GitHub
parent 536397cc95
commit 554aa9592e
170 changed files with 781 additions and 758 deletions

View File

@@ -3,10 +3,7 @@ from torch.optim.lr_scheduler import LambdaLR as _LambdaLR
from torch.optim.lr_scheduler import MultiplicativeLR as _MultiplicativeLR
from torch.optim.lr_scheduler import StepLR as _StepLR
from colossalai.legacy.registry import LR_SCHEDULERS
@LR_SCHEDULERS.register_module
class LambdaLR(_LambdaLR):
"""Sets the learning rate of each parameter group to the initial lr
times a given function. When last_epoch=-1, sets initial lr as lr.
@@ -24,7 +21,6 @@ class LambdaLR(_LambdaLR):
super().__init__(optimizer, lr_lambda, last_epoch=last_epoch)
@LR_SCHEDULERS.register_module
class MultiplicativeLR(_MultiplicativeLR):
"""Multiply the learning rate of each parameter group by the factor given
in the specified function. When last_epoch=-1, sets initial lr as lr.
@@ -42,7 +38,6 @@ class MultiplicativeLR(_MultiplicativeLR):
super().__init__(optimizer, lr_lambda, last_epoch=last_epoch)
@LR_SCHEDULERS.register_module
class StepLR(_StepLR):
"""Decays the learning rate of each parameter group by gamma every
step_size epochs. Notice that such decay can happen simultaneously with
@@ -61,7 +56,6 @@ class StepLR(_StepLR):
super().__init__(optimizer, step_size, gamma=gamma, last_epoch=last_epoch)
@LR_SCHEDULERS.register_module
class ExponentialLR(_ExponentialLR):
"""Decays the learning rate of each parameter group by gamma every epoch.
When last_epoch=-1, sets initial lr as lr