mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-09 13:00:52 +00:00
[misc] update pre-commit and run all files (#4752)
* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
This commit is contained in:
@@ -3,10 +3,21 @@ from .linear import LinearWarmupLR
|
||||
from .multistep import MultiStepLR, MultiStepWarmupLR
|
||||
from .onecycle import OneCycleLR
|
||||
from .poly import PolynomialLR, PolynomialWarmupLR
|
||||
from .torch import LambdaLR, MultiplicativeLR, StepLR, ExponentialLR
|
||||
from .torch import ExponentialLR, LambdaLR, MultiplicativeLR, StepLR
|
||||
|
||||
__all__ = [
|
||||
'CosineAnnealingLR', 'CosineAnnealingWarmupLR', 'FlatAnnealingLR', 'FlatAnnealingWarmupLR', 'LinearWarmupLR',
|
||||
'MultiStepLR', 'MultiStepWarmupLR', 'OneCycleLR', 'PolynomialLR', 'PolynomialWarmupLR', 'LambdaLR',
|
||||
'MultiplicativeLR', 'StepLR', 'ExponentialLR'
|
||||
"CosineAnnealingLR",
|
||||
"CosineAnnealingWarmupLR",
|
||||
"FlatAnnealingLR",
|
||||
"FlatAnnealingWarmupLR",
|
||||
"LinearWarmupLR",
|
||||
"MultiStepLR",
|
||||
"MultiStepWarmupLR",
|
||||
"OneCycleLR",
|
||||
"PolynomialLR",
|
||||
"PolynomialWarmupLR",
|
||||
"LambdaLR",
|
||||
"MultiplicativeLR",
|
||||
"StepLR",
|
||||
"ExponentialLR",
|
||||
]
|
||||
|
@@ -58,11 +58,10 @@ class CosineAnnealingWarmupLR(WarmupScheduler):
|
||||
the schedule is started from the beginning or When last_epoch=-1, sets initial lr as lr.
|
||||
"""
|
||||
|
||||
def __init__(self, optimizer, total_steps: int, warmup_steps: int = 0, eta_min: float = 0., last_epoch: int = -1):
|
||||
base_scheduler = _CosineAnnealingLR(optimizer,
|
||||
total_steps - warmup_steps,
|
||||
eta_min=eta_min,
|
||||
last_epoch=last_epoch)
|
||||
def __init__(self, optimizer, total_steps: int, warmup_steps: int = 0, eta_min: float = 0.0, last_epoch: int = -1):
|
||||
base_scheduler = _CosineAnnealingLR(
|
||||
optimizer, total_steps - warmup_steps, eta_min=eta_min, last_epoch=last_epoch
|
||||
)
|
||||
super().__init__(optimizer, warmup_steps, base_scheduler)
|
||||
|
||||
|
||||
@@ -79,7 +78,7 @@ class FlatAnnealingLR(DelayerScheduler):
|
||||
|
||||
def __init__(self, optimizer, total_steps: int, pct_start: float = 0.72, last_epoch: int = -1, **kwargs):
|
||||
if not (0.0 <= pct_start <= 1.0):
|
||||
raise ValueError(f'pct_start must >= 0.0 and <= 1.0, got {pct_start}')
|
||||
raise ValueError(f"pct_start must >= 0.0 and <= 1.0, got {pct_start}")
|
||||
flat_steps = int(total_steps * pct_start)
|
||||
anneal_steps = total_steps - flat_steps
|
||||
base_scheduler = _CosineAnnealingLR(optimizer, anneal_steps)
|
||||
@@ -100,16 +99,18 @@ class FlatAnnealingWarmupLR(WarmupDelayerScheduler):
|
||||
the schedule is started from the beginning or When last_epoch=-1, sets initial lr as lr.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
optimizer,
|
||||
total_steps: int,
|
||||
warmup_steps: int = 0,
|
||||
pct_start: float = 0.72,
|
||||
eta_min: int = 0,
|
||||
last_epoch: int = -1,
|
||||
**kwargs):
|
||||
def __init__(
|
||||
self,
|
||||
optimizer,
|
||||
total_steps: int,
|
||||
warmup_steps: int = 0,
|
||||
pct_start: float = 0.72,
|
||||
eta_min: int = 0,
|
||||
last_epoch: int = -1,
|
||||
**kwargs,
|
||||
):
|
||||
if not (0.0 <= pct_start <= 1.0):
|
||||
raise ValueError(f'pct_start must >= 0.0 and <= 1.0, got {pct_start}')
|
||||
raise ValueError(f"pct_start must >= 0.0 and <= 1.0, got {pct_start}")
|
||||
flat_steps = int((total_steps - warmup_steps) * pct_start)
|
||||
anneal_steps = total_steps - warmup_steps - flat_steps
|
||||
base_scheduler = _CosineAnnealingLR(optimizer, anneal_steps, eta_min=eta_min)
|
||||
|
@@ -2,7 +2,6 @@ from torch.optim.lr_scheduler import _LRScheduler
|
||||
|
||||
|
||||
class _enable_get_lr_call:
|
||||
|
||||
def __init__(self, o):
|
||||
self.o = o
|
||||
|
||||
@@ -28,18 +27,18 @@ class DelayerScheduler(_LRScheduler):
|
||||
|
||||
def __init__(self, optimizer, delay_epochs, after_scheduler, last_epoch=-1):
|
||||
if delay_epochs < 0:
|
||||
raise ValueError(f'delay_epochs must >= 0, got {delay_epochs}')
|
||||
raise ValueError(f"delay_epochs must >= 0, got {delay_epochs}")
|
||||
self.delay_epochs = delay_epochs
|
||||
self.after_scheduler = after_scheduler
|
||||
self.finished = False
|
||||
super().__init__(optimizer, last_epoch)
|
||||
|
||||
def state_dict(self):
|
||||
state_dict = {key: value for key, value in self.__dict__.items() if key not in 'optimizer'}
|
||||
if isinstance(state_dict['after_scheduler'], _LRScheduler):
|
||||
state_dict['after_scheduler_type'] = type(state_dict['after_scheduler']).__name__
|
||||
state_dict['after_scheduler_dict'] = state_dict['after_scheduler'].state_dict()
|
||||
del state_dict['after_scheduler']
|
||||
state_dict = {key: value for key, value in self.__dict__.items() if key not in "optimizer"}
|
||||
if isinstance(state_dict["after_scheduler"], _LRScheduler):
|
||||
state_dict["after_scheduler_type"] = type(state_dict["after_scheduler"]).__name__
|
||||
state_dict["after_scheduler_dict"] = state_dict["after_scheduler"].state_dict()
|
||||
del state_dict["after_scheduler"]
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
return state_dict
|
||||
@@ -85,11 +84,11 @@ class WarmupScheduler(_LRScheduler):
|
||||
super().__init__(optimizer, last_epoch)
|
||||
|
||||
def state_dict(self):
|
||||
state_dict = {key: value for key, value in self.__dict__.items() if key not in 'optimizer'}
|
||||
if isinstance(state_dict['after_scheduler'], _LRScheduler):
|
||||
state_dict['after_scheduler_type'] = type(state_dict['after_scheduler']).__name__
|
||||
state_dict['after_scheduler_dict'] = state_dict['after_scheduler'].state_dict()
|
||||
del state_dict['after_scheduler']
|
||||
state_dict = {key: value for key, value in self.__dict__.items() if key not in "optimizer"}
|
||||
if isinstance(state_dict["after_scheduler"], _LRScheduler):
|
||||
state_dict["after_scheduler_type"] = type(state_dict["after_scheduler"]).__name__
|
||||
state_dict["after_scheduler_dict"] = state_dict["after_scheduler"].state_dict()
|
||||
del state_dict["after_scheduler"]
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
return state_dict
|
||||
@@ -130,9 +129,9 @@ class WarmupDelayerScheduler(_LRScheduler):
|
||||
|
||||
def __init__(self, optimizer, warmup_epochs, delay_epochs, after_scheduler, last_epoch=-1):
|
||||
if delay_epochs < 0:
|
||||
raise ValueError(f'delay_epochs must >= 0, got {delay_epochs}')
|
||||
raise ValueError(f"delay_epochs must >= 0, got {delay_epochs}")
|
||||
if warmup_epochs < 0:
|
||||
raise ValueError(f'warmup_epochs must >= 0, got {warmup_epochs}')
|
||||
raise ValueError(f"warmup_epochs must >= 0, got {warmup_epochs}")
|
||||
self.warmup_epochs = warmup_epochs
|
||||
self.delay_epochs = delay_epochs
|
||||
self.after_scheduler = after_scheduler
|
||||
@@ -140,11 +139,11 @@ class WarmupDelayerScheduler(_LRScheduler):
|
||||
super().__init__(optimizer, last_epoch)
|
||||
|
||||
def state_dict(self):
|
||||
state_dict = {key: value for key, value in self.__dict__.items() if key not in 'optimizer'}
|
||||
if isinstance(state_dict['after_scheduler'], _LRScheduler):
|
||||
state_dict['after_scheduler_type'] = type(state_dict['after_scheduler']).__name__
|
||||
state_dict['after_scheduler_dict'] = state_dict['after_scheduler'].state_dict()
|
||||
del state_dict['after_scheduler']
|
||||
state_dict = {key: value for key, value in self.__dict__.items() if key not in "optimizer"}
|
||||
if isinstance(state_dict["after_scheduler"], _LRScheduler):
|
||||
state_dict["after_scheduler_type"] = type(state_dict["after_scheduler"]).__name__
|
||||
state_dict["after_scheduler_dict"] = state_dict["after_scheduler"].state_dict()
|
||||
del state_dict["after_scheduler"]
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
return state_dict
|
||||
@@ -155,7 +154,7 @@ class WarmupDelayerScheduler(_LRScheduler):
|
||||
self.after_scheduler.base_lrs = self.base_lrs
|
||||
# reset lr to base_lr
|
||||
for group, base_lr in zip(self.optimizer.param_groups, self.base_lrs):
|
||||
group['lr'] = base_lr
|
||||
group["lr"] = base_lr
|
||||
self.finished = True
|
||||
with _enable_get_lr_call(self.after_scheduler):
|
||||
return self.after_scheduler.get_lr()
|
||||
|
@@ -21,5 +21,7 @@ class LinearWarmupLR(_LRScheduler):
|
||||
if self.last_epoch < self.warmup_steps:
|
||||
return [(self.last_epoch + 1) / (self.warmup_steps + 1) * lr for lr in self.base_lrs]
|
||||
else:
|
||||
return [(self.total_steps - self.last_epoch) / (self.total_steps - self.warmup_steps) * lr
|
||||
for lr in self.base_lrs]
|
||||
return [
|
||||
(self.total_steps - self.last_epoch) / (self.total_steps - self.warmup_steps) * lr
|
||||
for lr in self.base_lrs
|
||||
]
|
||||
|
@@ -20,13 +20,15 @@ class MultiStepLR(_MultiStepLR):
|
||||
the schedule is started from the beginning or When last_epoch=-1, sets initial lr as lr.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
optimizer,
|
||||
total_steps: int,
|
||||
milestones: List[int] = None,
|
||||
gamma: float = 0.1,
|
||||
last_epoch: int = -1,
|
||||
**kwargs):
|
||||
def __init__(
|
||||
self,
|
||||
optimizer,
|
||||
total_steps: int,
|
||||
milestones: List[int] = None,
|
||||
gamma: float = 0.1,
|
||||
last_epoch: int = -1,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(optimizer, milestones, gamma=gamma, last_epoch=last_epoch)
|
||||
|
||||
|
||||
@@ -44,16 +46,18 @@ class MultiStepWarmupLR(WarmupScheduler):
|
||||
the schedule is started from the beginning or When last_epoch=-1, sets initial lr as lr.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
optimizer,
|
||||
total_steps: int,
|
||||
warmup_steps: int = 0,
|
||||
milestones: List[int] = None,
|
||||
gamma: float = 0.1,
|
||||
last_epoch: int = -1,
|
||||
**kwargs):
|
||||
def __init__(
|
||||
self,
|
||||
optimizer,
|
||||
total_steps: int,
|
||||
warmup_steps: int = 0,
|
||||
milestones: List[int] = None,
|
||||
gamma: float = 0.1,
|
||||
last_epoch: int = -1,
|
||||
**kwargs,
|
||||
):
|
||||
if len(milestones) == 0:
|
||||
raise ValueError('milestones cannot be empty')
|
||||
raise ValueError("milestones cannot be empty")
|
||||
milestones = [v - warmup_steps for v in milestones if v >= warmup_steps]
|
||||
base_scheduler = _MultiStepLR(optimizer, milestones=milestones, gamma=gamma)
|
||||
super().__init__(optimizer, warmup_steps, base_scheduler, last_epoch=last_epoch)
|
||||
|
@@ -65,27 +65,31 @@ class OneCycleLR(_OneCycleLR):
|
||||
https://arxiv.org/abs/1708.07120
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
optimizer,
|
||||
total_steps: int,
|
||||
pct_start=0.3,
|
||||
anneal_strategy='cos',
|
||||
cycle_momentum=True,
|
||||
base_momentum=0.85,
|
||||
max_momentum=0.95,
|
||||
div_factor=25.0,
|
||||
final_div_factor=10000.0,
|
||||
last_epoch=-1,
|
||||
**kwargs):
|
||||
max_lrs = list(map(lambda group: group['lr'], optimizer.param_groups))
|
||||
super().__init__(optimizer,
|
||||
max_lrs,
|
||||
total_steps=total_steps,
|
||||
pct_start=pct_start,
|
||||
anneal_strategy=anneal_strategy,
|
||||
cycle_momentum=cycle_momentum,
|
||||
base_momentum=base_momentum,
|
||||
max_momentum=max_momentum,
|
||||
div_factor=div_factor,
|
||||
final_div_factor=final_div_factor,
|
||||
last_epoch=last_epoch)
|
||||
def __init__(
|
||||
self,
|
||||
optimizer,
|
||||
total_steps: int,
|
||||
pct_start=0.3,
|
||||
anneal_strategy="cos",
|
||||
cycle_momentum=True,
|
||||
base_momentum=0.85,
|
||||
max_momentum=0.95,
|
||||
div_factor=25.0,
|
||||
final_div_factor=10000.0,
|
||||
last_epoch=-1,
|
||||
**kwargs,
|
||||
):
|
||||
max_lrs = list(map(lambda group: group["lr"], optimizer.param_groups))
|
||||
super().__init__(
|
||||
optimizer,
|
||||
max_lrs,
|
||||
total_steps=total_steps,
|
||||
pct_start=pct_start,
|
||||
anneal_strategy=anneal_strategy,
|
||||
cycle_momentum=cycle_momentum,
|
||||
base_momentum=base_momentum,
|
||||
max_momentum=max_momentum,
|
||||
div_factor=div_factor,
|
||||
final_div_factor=final_div_factor,
|
||||
last_epoch=last_epoch,
|
||||
)
|
||||
|
@@ -15,15 +15,11 @@ class PolynomialLR(_LRScheduler):
|
||||
the schedule is started from the beginning or When last_epoch=-1, sets initial lr as lr.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
optimizer,
|
||||
total_steps: int,
|
||||
end_lr: float = 0.0001,
|
||||
power: float = 1.0,
|
||||
last_epoch: int = -1,
|
||||
**kwargs):
|
||||
def __init__(
|
||||
self, optimizer, total_steps: int, end_lr: float = 0.0001, power: float = 1.0, last_epoch: int = -1, **kwargs
|
||||
):
|
||||
if end_lr < 0:
|
||||
raise ValueError(f'end_lr must >= 0, got {end_lr}')
|
||||
raise ValueError(f"end_lr must >= 0, got {end_lr}")
|
||||
self.total_steps = total_steps
|
||||
self.end_lr = end_lr
|
||||
self.power = power
|
||||
@@ -33,9 +29,11 @@ class PolynomialLR(_LRScheduler):
|
||||
return self._get_closed_form_lr()
|
||||
|
||||
def _get_closed_form_lr(self):
|
||||
return [(base_lr - self.end_lr) *
|
||||
((1 - min(self.last_epoch, self.total_steps) / self.total_steps)**self.power) + self.end_lr
|
||||
for base_lr in self.base_lrs]
|
||||
return [
|
||||
(base_lr - self.end_lr) * ((1 - min(self.last_epoch, self.total_steps) / self.total_steps) ** self.power)
|
||||
+ self.end_lr
|
||||
for base_lr in self.base_lrs
|
||||
]
|
||||
|
||||
|
||||
class PolynomialWarmupLR(WarmupScheduler):
|
||||
@@ -51,13 +49,15 @@ class PolynomialWarmupLR(WarmupScheduler):
|
||||
the schedule is started from the beginning or When last_epoch=-1, sets initial lr as lr.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
optimizer,
|
||||
total_steps: int,
|
||||
warmup_steps: int = 0,
|
||||
end_lr: float = 0.0001,
|
||||
power: float = 1.0,
|
||||
last_epoch: int = -1,
|
||||
**kwargs):
|
||||
def __init__(
|
||||
self,
|
||||
optimizer,
|
||||
total_steps: int,
|
||||
warmup_steps: int = 0,
|
||||
end_lr: float = 0.0001,
|
||||
power: float = 1.0,
|
||||
last_epoch: int = -1,
|
||||
**kwargs,
|
||||
):
|
||||
base_scheduler = PolynomialLR(optimizer, total_steps - warmup_steps, end_lr=end_lr, power=power)
|
||||
super().__init__(optimizer, warmup_steps, base_scheduler, last_epoch=last_epoch)
|
||||
|
Reference in New Issue
Block a user