mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-09 13:00:52 +00:00
[utils] fixed lazy init context (#1867)
This commit is contained in:
@@ -1,23 +1,24 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
import inspect
|
||||
import types
|
||||
from typing import Callable, List
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from colossalai.tensor import ColoParameter, ColoTensor
|
||||
|
||||
import types
|
||||
import inspect
|
||||
from typing import List, Callable
|
||||
from colossalai.tensor import ColoParameter, ColoTensor
|
||||
from colossalai.utils.model.utils import substitute_init_recursively
|
||||
|
||||
|
||||
class LazyInitContext():
|
||||
"""
|
||||
A context to allow for lazy weight initialization of PyTorch modules. It intercepts the tensor
|
||||
A context to allow for lazy weight initialization of PyTorch modules. It intercepts the tensor
|
||||
initialization functions for lazy initialization
|
||||
|
||||
Note:
|
||||
This API is only experimental and subject to future changes.
|
||||
This API is only experimental and subject to future changes.
|
||||
|
||||
Usage:
|
||||
with LazyInitContext() as ctx:
|
||||
@@ -30,19 +31,20 @@ class LazyInitContext():
|
||||
# initialize weights
|
||||
ctx.lazy_init_parameters(model)
|
||||
|
||||
# make sure the weight is not a meta tensor
|
||||
# make sure the weight is not a meta tensor
|
||||
# and initialized correctly
|
||||
assert not model.weight.is_meta and torch.all(model.weight == 0)
|
||||
|
||||
Args:
|
||||
to_meta (bool): optional, whether to initialize the model with meta tensors, default is False.
|
||||
extra_torch_tensor_func (List[str]): extra torch tensor functions related
|
||||
to_meta (bool): optional, whether to initialize the model with meta tensors, default is True. This
|
||||
argument exists for now because some corner cases such as self.weight = torch.zeros(...) cannot be captured yet.
|
||||
extra_torch_tensor_func (List[str]): extra torch tensor functions related
|
||||
to value setting, such as `zero_` and `triu_`. `zero_` is pre-added by default.
|
||||
"""
|
||||
|
||||
tensor_set_value_func = ['zero_', 'fill_']
|
||||
|
||||
def __init__(self, to_meta: bool = False, extra_torch_tensor_func: List[str] = None):
|
||||
def __init__(self, to_meta: bool = True, extra_torch_tensor_func: List[str] = None):
|
||||
# TODO: hijack the torch constructor functions as well
|
||||
self._to_meta = to_meta
|
||||
self._intercepted_nn_init_func_cache = {}
|
||||
@@ -212,18 +214,19 @@ class LazyInitContext():
|
||||
materialized_tensor = torch.empty_like(tensor, device=device)
|
||||
# if this tensor is a meta tensor, it must have an init function
|
||||
assert tensor in self._intercepted_nn_init_func_cache
|
||||
tensor = materialized_tensor
|
||||
else:
|
||||
materialized_tensor = tensor
|
||||
|
||||
# apply init function
|
||||
if tensor in self._intercepted_nn_init_func_cache:
|
||||
init_func, args, kwargs = self._intercepted_nn_init_func_cache[tensor][-1]
|
||||
init_func(tensor, *args, **kwargs)
|
||||
init_func(materialized_tensor, *args, **kwargs)
|
||||
|
||||
# convert it to ColoTensor or ColoParameter
|
||||
if is_param:
|
||||
tensor = ColoParameter.from_torch_tensor(tensor, requires_grad=tensor.requires_grad)
|
||||
tensor = ColoParameter.from_torch_tensor(materialized_tensor, requires_grad=tensor.requires_grad)
|
||||
else:
|
||||
tensor = ColoTensor.from_torch_tensor(tensor)
|
||||
tensor = ColoTensor.from_torch_tensor(materialized_tensor)
|
||||
|
||||
# override the original tensor
|
||||
with torch.no_grad():
|
||||
|
Reference in New Issue
Block a user