[bug] fix early return (#5740)

* [bug] fix silly bug

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [chore] add test for prefetch

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
botbw
2024-05-21 14:21:58 +08:00
committed by GitHub
parent 83716e9feb
commit 13c06d36a3
9 changed files with 50 additions and 19 deletions

View File

@@ -361,10 +361,11 @@ class Chunk:
"""Make the chunk usable for the parameters inside it. It's an operation done in CUDA."""
# sanity check
assert self.chunk_temp is None
maybe_work = None
if not self.is_gathered:
return self.__gather(async_op=async_access)
maybe_work = self.__gather(async_op=async_access)
self.__update_tensors_ptr()
return None
return maybe_work
def release_chunk(self):
"""Release the usable chunk. It's an operation done in CUDA."""

View File

@@ -5,7 +5,6 @@ from typing import List
import torch
from colossalai.logging import DistributedLogger
from colossalai.tensor.param_op_hook import ColoParamOpHook
from colossalai.utils import is_ddp_ignored
from colossalai.zero.gemini import TensorState
@@ -17,9 +16,6 @@ class TrainingPhase(Enum):
BACKWARD = 1
logger = DistributedLogger("gemini_hook")
class GeminiZeROHook(ColoParamOpHook):
def __init__(self, gemini_manager: GeminiManager) -> None:
super().__init__()

View File

@@ -177,6 +177,10 @@ class GeminiManager:
return self._mem_stats_collector.cuda_margin_mem
return None
@property
def placement_policy(self) -> PlacementPolicy:
return self._placement_policy
@property
def compute_list(self) -> List[Tuple[Chunk, ...]]:
return self._compute_list
@@ -189,10 +193,6 @@ class GeminiManager:
def async_works(self) -> Dict[Chunk, dist.Work]:
return self._async_works
@property
def placement_policy(self) -> PlacementPolicy:
return self._placement_policy
@property
def is_cuda_margin_mem_avail(self) -> bool:
return self._placement_policy.need_mem_stats