[zero] add constant placement policy (#1705)

* fixes memory leak when paramter is in fp16 in ZeroDDP init.
* bans chunk releasement in CUDA. Only when a chunk is about to offload, it is allowed to release.
* adds a constant placement policy. With it, users can allocate a reserved caching memory space for parameters.
This commit is contained in:
HELSON
2022-10-14 17:53:16 +08:00
committed by GitHub
parent 5f41463a76
commit 1468e4bcfc
11 changed files with 117 additions and 57 deletions

View File

@@ -33,7 +33,10 @@ def ColoModulize(module):
class ColoInitContext(InsertPostInitMethodToModuleSubClasses):
def __init__(self, lazy_memory_allocate: bool = False, device: torch.device = torch.device('cpu')):
def __init__(self,
lazy_memory_allocate: bool = False,
device: torch.device = torch.device('cpu'),
dtype: torch.dtype = torch.float):
"""
Args:
lazy_memory_allocate (bool, optional): whether to allocate memory for the parameter tensors. Defaults to False.
@@ -42,6 +45,7 @@ class ColoInitContext(InsertPostInitMethodToModuleSubClasses):
super().__init__()
self._lazy_memory_allocate = lazy_memory_allocate
self._device = device
self._dtype = dtype
self._register_colo_modules()
@@ -87,7 +91,8 @@ class ColoInitContext(InsertPostInitMethodToModuleSubClasses):
# detaching tensor is necessary for optimizers.
requires_grad = param.requires_grad
# TODO(jiaruifang) we initialize a Default PG memory
colo_param = ColoParameter(param.to(self._device), requires_grad=requires_grad)
colo_param = ColoParameter(param.to(device=self._device, dtype=self._dtype),
requires_grad=requires_grad)
# add mapping record
replaced_tensors[param] = colo_param
delattr(submodule, param_name)