[legacy] clean up legacy code (#4743)

* [legacy] remove outdated codes of pipeline (#4692) * [legacy] remove cli of benchmark and update optim (#4690) * [legacy] remove cli of benchmark and update optim * [doc] fix cli doc test * [legacy] fix engine clip grad norm * [legacy] remove outdated colo tensor (#4694) * [legacy] remove outdated colo tensor * [test] fix test import * [legacy] move outdated zero to legacy (#4696) * [legacy] clean up utils (#4700) * [legacy] clean up utils * [example] update examples * [legacy] clean up amp * [legacy] fix amp module * [legacy] clean up gpc (#4742) * [legacy] clean up context * [legacy] clean core, constants and global vars * [legacy] refactor initialize * [example] fix examples ci * [example] fix examples ci * [legacy] fix tests * [example] fix gpt example * [example] fix examples ci * [devops] fix ci installation * [example] fix examples ci
2025-09-16 06:30:41 +00:00 · 2023-09-18 16:31:06 +08:00
parent 32e7f99416
commit b5f9e37c70
342 changed files with 2919 additions and 4182 deletions
--- a/colossalai/nn/optimizer/init.py
+++ b/colossalai/nn/optimizer/init.py
@@ -1,10 +1,9 @@
-from .colossalai_optimizer import ColossalaiOptimizer
+from .cpu_adam import CPUAdam
 from .fused_adam import FusedAdam
 from .fused_lamb import FusedLAMB
 from .fused_sgd import FusedSGD
+from .hybrid_adam import HybridAdam
 from .lamb import Lamb
 from .lars import Lars
-from .cpu_adam import CPUAdam
-from .hybrid_adam import HybridAdam

-__all__ = ['ColossalaiOptimizer', 'FusedLAMB', 'FusedAdam', 'FusedSGD', 'Lamb', 'Lars', 'CPUAdam', 'HybridAdam']
+__all__ = ['FusedLAMB', 'FusedAdam', 'FusedSGD', 'Lamb', 'Lars', 'CPUAdam', 'HybridAdam']
--- a/colossalai/nn/optimizer/colossalai_optimizer.py
+++ b/colossalai/nn/optimizer/colossalai_optimizer.py
@@ -1,44 +0,0 @@
-import torch
-import torch.nn as nn
-from torch import Tensor
-from torch.optim import Optimizer
-from colossalai.utils import clip_grad_norm_fp32
-
-
-class ColossalaiOptimizer(Optimizer):
-
-    def __init__(self, optim: Optimizer):
-        self.optim = optim
-
-    @property
-    def param_groups(self):
-        return self.optim.param_groups
-
-    @property
-    def defaults(self):
-        return self.optim.defaults
-
-    def add_param_group(self, *args, **kwargs):
-        return self.optim.add_param_group(*args, **kwargs)
-
-    def step(self, *args, **kwargs):
-        return self.optim.step(*args, **kwargs)
-
-    def zero_grad(self, *args, **kwargs):
-        self.optim.zero_grad(*args, **kwargs)
-
-    def load_state_dict(self, *args, **kwargs):
-        self.optim.load_state_dict(*args, **kwargs)
-
-    def state_dict(self):
-        return self.optim.state_dict()
-
-    def backward(self, loss: Tensor):
-        loss.backward()
-
-    def backward_by_grad(self, tensor: Tensor, grad: Tensor):
-        torch.autograd.backward(tensors=tensor, grad_tensors=grad)
-
-    def clip_grad_norm(self, model: nn.Module, max_norm: float):
-        if max_norm > 0.0:
-            clip_grad_norm_fp32(model.parameters(), max_norm)