[legacy] clean up legacy code (#4743)

* [legacy] remove outdated codes of pipeline (#4692) * [legacy] remove cli of benchmark and update optim (#4690) * [legacy] remove cli of benchmark and update optim * [doc] fix cli doc test * [legacy] fix engine clip grad norm * [legacy] remove outdated colo tensor (#4694) * [legacy] remove outdated colo tensor * [test] fix test import * [legacy] move outdated zero to legacy (#4696) * [legacy] clean up utils (#4700) * [legacy] clean up utils * [example] update examples * [legacy] clean up amp * [legacy] fix amp module * [legacy] clean up gpc (#4742) * [legacy] clean up context * [legacy] clean core, constants and global vars * [legacy] refactor initialize * [example] fix examples ci * [example] fix examples ci * [legacy] fix tests * [example] fix gpt example * [example] fix examples ci * [devops] fix ci installation * [example] fix examples ci
2025-09-06 11:32:10 +00:00 · 2023-09-18 16:31:06 +08:00
parent 32e7f99416
commit b5f9e37c70
342 changed files with 2919 additions and 4182 deletions
--- a/colossalai/legacy/engine/gradient_accumulation/_gradient_accumulation.py
+++ b/colossalai/legacy/engine/gradient_accumulation/_gradient_accumulation.py
@@ -10,12 +10,12 @@ from torch.optim import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler
 from torch.utils.data import DataLoader

+from colossalai.interface import OptimizerWrapper
 from colossalai.legacy.engine import BaseGradientHandler
-from colossalai.nn.optimizer import ColossalaiOptimizer
 from colossalai.utils import conditional_context


-class GradAccumOptimizer(ColossalaiOptimizer):
+class GradAccumOptimizer(OptimizerWrapper):
    """A wrapper for the optimizer to enable gradient accumulation by skipping the steps
    before accumulation size is reached.

@@ -74,7 +74,7 @@ class GradAccumOptimizer(ColossalaiOptimizer):
        if self.accumulate_step < self.accumulate_size:
            pass
        else:
-            self.optim.clip_grad_norm(model, max_norm)
+            self.optim.clip_grad_by_norm(max_norm)

    def backward(self, loss: Tensor) -> None:
        """Execute backward pass.