mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-06 19:40:28 +00:00
[refactor] move process group from _DistSpec to ColoTensor. (#1203)
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
from .utils import InsertPostInitMethodToModuleSubClasses
|
||||
import torch
|
||||
from colossalai.tensor import ColoTensor, ColoParameter, distspec, TensorSpec
|
||||
from colossalai.tensor import ColoTensor, ColoParameter, distspec
|
||||
|
||||
from colossalai.nn.parallel.layers import register_colo_module, \
|
||||
ColoLinear, ColoEmbedding
|
||||
@@ -36,16 +36,17 @@ def ColoModulize(module):
|
||||
|
||||
def colo_state_dict(self, destination=None, prefix='', keep_vars=False, state_dict_func=None):
|
||||
# build param to spec mapping
|
||||
mapping = dict()
|
||||
|
||||
mapping1 = dict()
|
||||
mapping2 = dict()
|
||||
# gather all params
|
||||
has_dist_parameter = False
|
||||
with torch.no_grad():
|
||||
for param in self.parameters():
|
||||
if isinstance(param, ColoParameter) and param.has_compute_spec():
|
||||
has_dist_parameter = True
|
||||
mapping[id(param)] = copy(param.tensor_spec)
|
||||
param.set_tensor_spec(TensorSpec(distspec.replicate()))
|
||||
mapping1[id(param)] = copy(param.dist_spec)
|
||||
mapping2[id(param)] = copy(param.compute_spec)
|
||||
param.set_dist_spec(distspec.replicate())
|
||||
|
||||
# TODO: fix when keep_vars = True
|
||||
# when keep_vars = False, the state_dict_func will call detach to create
|
||||
@@ -60,9 +61,10 @@ def colo_state_dict(self, destination=None, prefix='', keep_vars=False, state_di
|
||||
with torch.no_grad():
|
||||
for param in self.parameters():
|
||||
param_id = id(param)
|
||||
if param_id in mapping:
|
||||
spec = mapping[id(param)]
|
||||
param.set_tensor_spec(spec)
|
||||
if param_id in mapping1:
|
||||
dist_spec = mapping1[id(param)]
|
||||
compute_spec = mapping2[id(param)]
|
||||
param.set_tensor_spec(dist_spec, compute_spec)
|
||||
return ret
|
||||
|
||||
|
||||
@@ -122,7 +124,7 @@ class ColoInitContext(InsertPostInitMethodToModuleSubClasses):
|
||||
save_torch_payload = True if not self._lazy_memory_allocate else False
|
||||
# detaching tensor is necessary for optimizers.
|
||||
requires_grad = param.requires_grad
|
||||
|
||||
# TODO(jiaruifang) we initialize a Default PG memory
|
||||
colo_param = ColoParameter(param.to(self._device), requires_grad=requires_grad)
|
||||
# add mapping record
|
||||
replaced_tensors[param] = colo_param
|
||||
|
Reference in New Issue
Block a user