mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-02 17:46:42 +00:00
[misc] update pre-commit and run all files (#4752)
* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
import math
|
||||
import warnings
|
||||
|
||||
from torch import Tensor
|
||||
import torch.nn as nn
|
||||
from torch import Tensor
|
||||
|
||||
|
||||
def zeros_():
|
||||
@@ -23,7 +23,7 @@ def ones_():
|
||||
return initializer
|
||||
|
||||
|
||||
def uniform_(a: float = 0., b: float = 1.):
|
||||
def uniform_(a: float = 0.0, b: float = 1.0):
|
||||
r"""Return the initializer filling the input Tensor with values drawn from the uniform
|
||||
distribution :math:`\mathcal{U}(a, b)`.
|
||||
|
||||
@@ -38,7 +38,7 @@ def uniform_(a: float = 0., b: float = 1.):
|
||||
return initializer
|
||||
|
||||
|
||||
def normal_(mean: float = 0., std: float = 1.):
|
||||
def normal_(mean: float = 0.0, std: float = 1.0):
|
||||
r"""Return the initializer filling the input Tensor with values drawn from the normal distribution
|
||||
|
||||
.. math::
|
||||
@@ -47,7 +47,7 @@ def normal_(mean: float = 0., std: float = 1.):
|
||||
Args:
|
||||
mean (float): the mean of the normal distribution. Defaults 0.0.
|
||||
std (float): the standard deviation of the normal distribution. Defaults 1.0.
|
||||
"""
|
||||
"""
|
||||
|
||||
def initializer(tensor: Tensor, fan_in: int = None, fan_out: int = None):
|
||||
return nn.init.normal_(tensor, mean, std)
|
||||
@@ -55,7 +55,7 @@ def normal_(mean: float = 0., std: float = 1.):
|
||||
return initializer
|
||||
|
||||
|
||||
def trunc_normal_(mean: float = 0., std: float = 1., a: float = -2., b: float = 2.):
|
||||
def trunc_normal_(mean: float = 0.0, std: float = 1.0, a: float = -2.0, b: float = 2.0):
|
||||
r"""Return the initializer filling the input Tensor with values drawn from a truncated
|
||||
normal distribution. The values are effectively drawn from the
|
||||
normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
|
||||
@@ -76,7 +76,7 @@ def trunc_normal_(mean: float = 0., std: float = 1., a: float = -2., b: float =
|
||||
return initializer
|
||||
|
||||
|
||||
def kaiming_uniform_(a=0, mode='fan_in', nonlinearity='leaky_relu'):
|
||||
def kaiming_uniform_(a=0, mode="fan_in", nonlinearity="leaky_relu"):
|
||||
r"""Return the initializer filling the input `Tensor` with values according to the method
|
||||
described in `Delving deep into rectifiers: Surpassing human-level
|
||||
performance on ImageNet classification` - He, K. et al. (2015), using a
|
||||
@@ -104,23 +104,23 @@ def kaiming_uniform_(a=0, mode='fan_in', nonlinearity='leaky_relu'):
|
||||
warnings.warn("Initializing zero-element tensors is a no-op")
|
||||
return tensor
|
||||
|
||||
if mode == 'fan_in':
|
||||
assert fan_in is not None, 'Fan_in is not provided.'
|
||||
if mode == "fan_in":
|
||||
assert fan_in is not None, "Fan_in is not provided."
|
||||
fan = fan_in
|
||||
elif mode == 'fan_out':
|
||||
assert fan_out is not None, 'Fan_out is not provided.'
|
||||
elif mode == "fan_out":
|
||||
assert fan_out is not None, "Fan_out is not provided."
|
||||
fan = fan_out
|
||||
else:
|
||||
raise ValueError(f'Invalid initialization mode \'{mode}\'')
|
||||
raise ValueError(f"Invalid initialization mode '{mode}'")
|
||||
|
||||
std = nn.init.calculate_gain(nonlinearity, a) / math.sqrt(fan)
|
||||
bound = math.sqrt(3.) * std
|
||||
bound = math.sqrt(3.0) * std
|
||||
return nn.init.uniform_(tensor, -bound, bound)
|
||||
|
||||
return initializer
|
||||
|
||||
|
||||
def kaiming_normal_(a=0, mode='fan_in', nonlinearity='leaky_relu'):
|
||||
def kaiming_normal_(a=0, mode="fan_in", nonlinearity="leaky_relu"):
|
||||
r"""Return the initializer filling the input `Tensor` with values according to the method
|
||||
described in `Delving deep into rectifiers: Surpassing human-level
|
||||
performance on ImageNet classification` - He, K. et al. (2015), using a
|
||||
@@ -148,14 +148,14 @@ def kaiming_normal_(a=0, mode='fan_in', nonlinearity='leaky_relu'):
|
||||
warnings.warn("Initializing zero-element tensors is a no-op")
|
||||
return tensor
|
||||
|
||||
if mode == 'fan_in':
|
||||
assert fan_in is not None, 'Fan_in is not provided.'
|
||||
if mode == "fan_in":
|
||||
assert fan_in is not None, "Fan_in is not provided."
|
||||
fan = fan_in
|
||||
elif mode == 'fan_out':
|
||||
assert fan_out is not None, 'Fan_out is not provided.'
|
||||
elif mode == "fan_out":
|
||||
assert fan_out is not None, "Fan_out is not provided."
|
||||
fan = fan_out
|
||||
else:
|
||||
raise ValueError(f'Invalid initialization mode \'{mode}\'')
|
||||
raise ValueError(f"Invalid initialization mode '{mode}'")
|
||||
|
||||
std = nn.init.calculate_gain(nonlinearity, a) / math.sqrt(fan)
|
||||
return nn.init.normal_(tensor, 0, std)
|
||||
@@ -163,7 +163,7 @@ def kaiming_normal_(a=0, mode='fan_in', nonlinearity='leaky_relu'):
|
||||
return initializer
|
||||
|
||||
|
||||
def xavier_uniform_(a: float = math.sqrt(3.), scale: float = 2., gain: float = 1.):
|
||||
def xavier_uniform_(a: float = math.sqrt(3.0), scale: float = 2.0, gain: float = 1.0):
|
||||
r"""Return the initializer filling the input `Tensor` with values according to the method
|
||||
described in `Understanding the difficulty of training deep feedforward
|
||||
neural networks` - Glorot, X. & Bengio, Y. (2010), using a uniform
|
||||
@@ -184,7 +184,7 @@ def xavier_uniform_(a: float = math.sqrt(3.), scale: float = 2., gain: float = 1
|
||||
|
||||
# adapted from torch.nn.init
|
||||
def initializer(tensor: Tensor, fan_in: int = None, fan_out: int = None):
|
||||
assert fan_in is not None, 'Fan_in is not provided.'
|
||||
assert fan_in is not None, "Fan_in is not provided."
|
||||
|
||||
fan = fan_in
|
||||
if fan_out is not None:
|
||||
@@ -197,7 +197,7 @@ def xavier_uniform_(a: float = math.sqrt(3.), scale: float = 2., gain: float = 1
|
||||
return initializer
|
||||
|
||||
|
||||
def xavier_normal_(scale: float = 2., gain: float = 1.):
|
||||
def xavier_normal_(scale: float = 2.0, gain: float = 1.0):
|
||||
r"""Return the initializer filling the input `Tensor` with values according to the method
|
||||
described in `Understanding the difficulty of training deep feedforward
|
||||
neural networks` - Glorot, X. & Bengio, Y. (2010), using a normal
|
||||
@@ -216,7 +216,7 @@ def xavier_normal_(scale: float = 2., gain: float = 1.):
|
||||
|
||||
# adapted from torch.nn.init
|
||||
def initializer(tensor: Tensor, fan_in: int = None, fan_out: int = None):
|
||||
assert fan_in is not None, 'Fan_in is not provided.'
|
||||
assert fan_in is not None, "Fan_in is not provided."
|
||||
|
||||
fan = fan_in
|
||||
if fan_out is not None:
|
||||
@@ -224,7 +224,7 @@ def xavier_normal_(scale: float = 2., gain: float = 1.):
|
||||
|
||||
std = gain * math.sqrt(scale / float(fan))
|
||||
|
||||
return nn.init.normal_(tensor, 0., std)
|
||||
return nn.init.normal_(tensor, 0.0, std)
|
||||
|
||||
return initializer
|
||||
|
||||
@@ -232,7 +232,7 @@ def xavier_normal_(scale: float = 2., gain: float = 1.):
|
||||
def lecun_uniform_():
|
||||
# adapted from jax.nn.initializers
|
||||
def initializer(tensor: Tensor, fan_in: int = None, fan_out: int = None):
|
||||
assert fan_in is not None, 'Fan_in is not provided.'
|
||||
assert fan_in is not None, "Fan_in is not provided."
|
||||
|
||||
var = 1.0 / fan_in
|
||||
bound = math.sqrt(3 * var)
|
||||
@@ -244,9 +244,9 @@ def lecun_uniform_():
|
||||
def lecun_normal_():
|
||||
# adapted from jax.nn.initializers
|
||||
def initializer(tensor: Tensor, fan_in: int = None, fan_out: int = None):
|
||||
assert fan_in is not None, 'Fan_in is not provided.'
|
||||
assert fan_in is not None, "Fan_in is not provided."
|
||||
|
||||
std = math.sqrt(1.0 / fan_in)
|
||||
return nn.init.trunc_normal_(tensor, std=std / .87962566103423978)
|
||||
return nn.init.trunc_normal_(tensor, std=std / 0.87962566103423978)
|
||||
|
||||
return initializer
|
||||
|
Reference in New Issue
Block a user