[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
2025-09-02 17:46:42 +00:00 · 2023-09-19 14:20:26 +08:00
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions
--- a/colossalai/nn/init.py
+++ b/colossalai/nn/init.py
@@ -1,8 +1,8 @@
 import math
 import warnings

-from torch import Tensor
 import torch.nn as nn
+from torch import Tensor


 def zeros_():
@@ -23,7 +23,7 @@ def ones_():
    return initializer


-def uniform_(a: float = 0., b: float = 1.):
+def uniform_(a: float = 0.0, b: float = 1.0):
    r"""Return the initializer filling the input Tensor with values drawn from the uniform
    distribution :math:`\mathcal{U}(a, b)`.

@@ -38,7 +38,7 @@ def uniform_(a: float = 0., b: float = 1.):
    return initializer


-def normal_(mean: float = 0., std: float = 1.):
+def normal_(mean: float = 0.0, std: float = 1.0):
    r"""Return the initializer filling the input Tensor with values drawn from the normal distribution

     .. math::
@@ -47,7 +47,7 @@ def normal_(mean: float = 0., std: float = 1.):
    Args:
        mean (float): the mean of the normal distribution. Defaults 0.0.
        std (float): the standard deviation of the normal distribution. Defaults 1.0.
-     """
+    """

    def initializer(tensor: Tensor, fan_in: int = None, fan_out: int = None):
        return nn.init.normal_(tensor, mean, std)
@@ -55,7 +55,7 @@ def normal_(mean: float = 0., std: float = 1.):
    return initializer


-def trunc_normal_(mean: float = 0., std: float = 1., a: float = -2., b: float = 2.):
+def trunc_normal_(mean: float = 0.0, std: float = 1.0, a: float = -2.0, b: float = 2.0):
    r"""Return the initializer filling the input Tensor with values drawn from a truncated
    normal distribution. The values are effectively drawn from the
    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
@@ -76,7 +76,7 @@ def trunc_normal_(mean: float = 0., std: float = 1., a: float = -2., b: float =
    return initializer


-def kaiming_uniform_(a=0, mode='fan_in', nonlinearity='leaky_relu'):
+def kaiming_uniform_(a=0, mode="fan_in", nonlinearity="leaky_relu"):
    r"""Return the initializer filling the input `Tensor` with values according to the method
    described in `Delving deep into rectifiers: Surpassing human-level
    performance on ImageNet classification` - He, K. et al. (2015), using a
@@ -104,23 +104,23 @@ def kaiming_uniform_(a=0, mode='fan_in', nonlinearity='leaky_relu'):
            warnings.warn("Initializing zero-element tensors is a no-op")
            return tensor

-        if mode == 'fan_in':
-            assert fan_in is not None, 'Fan_in is not provided.'
+        if mode == "fan_in":
+            assert fan_in is not None, "Fan_in is not provided."
            fan = fan_in
-        elif mode == 'fan_out':
-            assert fan_out is not None, 'Fan_out is not provided.'
+        elif mode == "fan_out":
+            assert fan_out is not None, "Fan_out is not provided."
            fan = fan_out
        else:
-            raise ValueError(f'Invalid initialization mode \'{mode}\'')
+            raise ValueError(f"Invalid initialization mode '{mode}'")

        std = nn.init.calculate_gain(nonlinearity, a) / math.sqrt(fan)
-        bound = math.sqrt(3.) * std
+        bound = math.sqrt(3.0) * std
        return nn.init.uniform_(tensor, -bound, bound)

    return initializer


-def kaiming_normal_(a=0, mode='fan_in', nonlinearity='leaky_relu'):
+def kaiming_normal_(a=0, mode="fan_in", nonlinearity="leaky_relu"):
    r"""Return the initializer filling the input `Tensor` with values according to the method
    described in `Delving deep into rectifiers: Surpassing human-level
    performance on ImageNet classification` - He, K. et al. (2015), using a
@@ -148,14 +148,14 @@ def kaiming_normal_(a=0, mode='fan_in', nonlinearity='leaky_relu'):
            warnings.warn("Initializing zero-element tensors is a no-op")
            return tensor

-        if mode == 'fan_in':
-            assert fan_in is not None, 'Fan_in is not provided.'
+        if mode == "fan_in":
+            assert fan_in is not None, "Fan_in is not provided."
            fan = fan_in
-        elif mode == 'fan_out':
-            assert fan_out is not None, 'Fan_out is not provided.'
+        elif mode == "fan_out":
+            assert fan_out is not None, "Fan_out is not provided."
            fan = fan_out
        else:
-            raise ValueError(f'Invalid initialization mode \'{mode}\'')
+            raise ValueError(f"Invalid initialization mode '{mode}'")

        std = nn.init.calculate_gain(nonlinearity, a) / math.sqrt(fan)
        return nn.init.normal_(tensor, 0, std)
@@ -163,7 +163,7 @@ def kaiming_normal_(a=0, mode='fan_in', nonlinearity='leaky_relu'):
    return initializer


-def xavier_uniform_(a: float = math.sqrt(3.), scale: float = 2., gain: float = 1.):
+def xavier_uniform_(a: float = math.sqrt(3.0), scale: float = 2.0, gain: float = 1.0):
    r"""Return the initializer filling the input `Tensor` with values according to the method
    described in `Understanding the difficulty of training deep feedforward
    neural networks` - Glorot, X. & Bengio, Y. (2010), using a uniform
@@ -184,7 +184,7 @@ def xavier_uniform_(a: float = math.sqrt(3.), scale: float = 2., gain: float = 1

    # adapted from torch.nn.init
    def initializer(tensor: Tensor, fan_in: int = None, fan_out: int = None):
-        assert fan_in is not None, 'Fan_in is not provided.'
+        assert fan_in is not None, "Fan_in is not provided."

        fan = fan_in
        if fan_out is not None:
@@ -197,7 +197,7 @@ def xavier_uniform_(a: float = math.sqrt(3.), scale: float = 2., gain: float = 1
    return initializer


-def xavier_normal_(scale: float = 2., gain: float = 1.):
+def xavier_normal_(scale: float = 2.0, gain: float = 1.0):
    r"""Return the initializer filling the input `Tensor` with values according to the method
    described in `Understanding the difficulty of training deep feedforward
    neural networks` - Glorot, X. & Bengio, Y. (2010), using a normal
@@ -216,7 +216,7 @@ def xavier_normal_(scale: float = 2., gain: float = 1.):

    # adapted from torch.nn.init
    def initializer(tensor: Tensor, fan_in: int = None, fan_out: int = None):
-        assert fan_in is not None, 'Fan_in is not provided.'
+        assert fan_in is not None, "Fan_in is not provided."

        fan = fan_in
        if fan_out is not None:
@@ -224,7 +224,7 @@ def xavier_normal_(scale: float = 2., gain: float = 1.):

        std = gain * math.sqrt(scale / float(fan))

-        return nn.init.normal_(tensor, 0., std)
+        return nn.init.normal_(tensor, 0.0, std)

    return initializer

@@ -232,7 +232,7 @@ def xavier_normal_(scale: float = 2., gain: float = 1.):
 def lecun_uniform_():
    # adapted from jax.nn.initializers
    def initializer(tensor: Tensor, fan_in: int = None, fan_out: int = None):
-        assert fan_in is not None, 'Fan_in is not provided.'
+        assert fan_in is not None, "Fan_in is not provided."

        var = 1.0 / fan_in
        bound = math.sqrt(3 * var)
@@ -244,9 +244,9 @@ def lecun_uniform_():
 def lecun_normal_():
    # adapted from jax.nn.initializers
    def initializer(tensor: Tensor, fan_in: int = None, fan_out: int = None):
-        assert fan_in is not None, 'Fan_in is not provided.'
+        assert fan_in is not None, "Fan_in is not provided."

        std = math.sqrt(1.0 / fan_in)
-        return nn.init.trunc_normal_(tensor, std=std / .87962566103423978)
+        return nn.init.trunc_normal_(tensor, std=std / 0.87962566103423978)

    return initializer