[refactor] memory utils (#577)

2025-09-23 18:39:56 +00:00 · 2022-04-01 09:22:33 +08:00
parent 104cbbb313
commit e956d93ac2
15 changed files with 261 additions and 202 deletions
--- a/colossalai/engine/gradient_handler/_moe_gradient_handler.py
+++ b/colossalai/engine/gradient_handler/_moe_gradient_handler.py
@@ -29,6 +29,7 @@ class MoeGradientHandler(BaseGradientHandler):
        if global_data > 1:
            epsize_param_dict = get_moe_epsize_param_dict(self._model)

+
            # epsize is 1, indicating the params are replicated among processes in data parallelism
            # use the ParallelMode.DATA to get data parallel group
            # reduce gradients for all parameters in data parallelism
--- a/colossalai/engine/ophooks/zero_hook.py
+++ b/colossalai/engine/ophooks/zero_hook.py
@@ -10,8 +10,7 @@ from colossalai.zero.sharded_param.tensorful_state import TensorState

 from ._base_ophook import BaseOpHook

-from colossalai.utils.memory_utils.utils import \
-    colo_model_data_tensor_move_inline
+from colossalai.zero.shard_utils.tensor_utils import colo_model_data_tensor_move_inline


@OPHOOKS.register_module