[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
2025-09-01 17:17:05 +00:00 · 2023-09-19 14:20:26 +08:00
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions
--- a/tests/test_analyzer/test_fx/test_bias_addition.py
+++ b/tests/test_analyzer/test_fx/test_bias_addition.py
@@ -12,7 +12,6 @@ except:


 class LinearModel(torch.nn.Module):
-
    def __init__(self, in_features, out_features, bias):
        super().__init__()
        self.linear = torch.nn.Linear(in_features, out_features, bias=bias)
@@ -23,25 +22,14 @@ class LinearModel(torch.nn.Module):


 class ConvModel(torch.nn.Module):
-
    def __init__(self, in_channel, out_channels, kernel_size, bias) -> None:
        super().__init__()
-        self.conv = torch.nn.Conv2d(in_channel,
-                                    out_channels,
-                                    kernel_size,
-                                    bias=bias,
-                                    padding=1,
-                                    stride=2,
-                                    dilation=2,
-                                    groups=3)
-        self.conv_transpose = torch.nn.ConvTranspose2d(in_channel,
-                                                       out_channels,
-                                                       kernel_size,
-                                                       bias=bias,
-                                                       padding=1,
-                                                       stride=2,
-                                                       dilation=2,
-                                                       groups=3)
+        self.conv = torch.nn.Conv2d(
+            in_channel, out_channels, kernel_size, bias=bias, padding=1, stride=2, dilation=2, groups=3
+        )
+        self.conv_transpose = torch.nn.ConvTranspose2d(
+            in_channel, out_channels, kernel_size, bias=bias, padding=1, stride=2, dilation=2, groups=3
+        )

    def forward(self, x, select=0):
        if select == 0:
@@ -52,7 +40,6 @@ class ConvModel(torch.nn.Module):


 class SiuModel(torch.nn.Module):
-
    def __init__(self, bias) -> None:
        super().__init__()
        self.linear = LinearModel(3, 3, bias)
@@ -69,7 +56,6 @@ class SiuModel(torch.nn.Module):


 class AddmmModel(torch.nn.Module):
-
    def __init__(self, alpha, beta) -> None:
        super().__init__()
        self.alpha = alpha
@@ -80,7 +66,7 @@ class AddmmModel(torch.nn.Module):
        return x


-@pytest.mark.skipif(version.parse(torch.__version__) < version.parse('1.12.0'), reason='torch version < 12')
+@pytest.mark.skipif(version.parse(torch.__version__) < version.parse("1.12.0"), reason="torch version < 12")
@clear_cache_before_run()
@parameterize("bias", [True, False])
@parameterize("bias_addition_split", [True, False])
@@ -89,19 +75,21 @@ class AddmmModel(torch.nn.Module):
 def test_siu_model(bias, bias_addition_split, shape, select):
    model = SiuModel(bias=bias)
    x = torch.rand(shape)
-    gm = symbolic_trace(model,
-                        meta_args={'x': x},
-                        concrete_args={'select': select},
-                        trace_act_ckpt=True,
-                        bias_addition_split=bias_addition_split)
-    assert torch.allclose(model(x, select), gm(x)), 'original model and traced model should be the same!'
+    gm = symbolic_trace(
+        model,
+        meta_args={"x": x},
+        concrete_args={"select": select},
+        trace_act_ckpt=True,
+        bias_addition_split=bias_addition_split,
+    )
+    assert torch.allclose(model(x, select), gm(x)), "original model and traced model should be the same!"
    if bias and bias_addition_split:
-        assert '+' in gm.code, 'bias addition should be split!'
+        assert "+" in gm.code, "bias addition should be split!"
    else:
-        assert '+' not in gm.code, 'bias addition should not be split!'
+        assert "+" not in gm.code, "bias addition should not be split!"


-@pytest.mark.skipif(version.parse(torch.__version__) < version.parse('1.12.0'), reason='torch version < 12')
+@pytest.mark.skipif(version.parse(torch.__version__) < version.parse("1.12.0"), reason="torch version < 12")
@parameterize("alpha", [1, 2])
@parameterize("beta", [1, 2])
@parameterize("bias_addition_split", [True, False])
@@ -109,14 +97,14 @@ def test_siu_model(bias, bias_addition_split, shape, select):
 def test_addmm_model(alpha, beta, bias_addition_split, shape):
    model = AddmmModel(alpha=alpha, beta=beta)
    x = torch.rand(shape)
-    gm = symbolic_trace(model, meta_args={'x': x}, trace_act_ckpt=True, bias_addition_split=bias_addition_split)
-    assert torch.allclose(model(x), gm(x)), 'original model and traced model should be the same!'
+    gm = symbolic_trace(model, meta_args={"x": x}, trace_act_ckpt=True, bias_addition_split=bias_addition_split)
+    assert torch.allclose(model(x), gm(x)), "original model and traced model should be the same!"
    if (alpha == 1 and beta == 1) or not bias_addition_split:
-        assert '*' not in gm.code, 'bias addition should not be split!'
+        assert "*" not in gm.code, "bias addition should not be split!"
    elif bias_addition_split:
-        assert '+' in gm.code, 'bias addition should be split!'
+        assert "+" in gm.code, "bias addition should be split!"


-if __name__ == '__main__':
+if __name__ == "__main__":
    test_siu_model()
    test_addmm_model()
--- a/tests/test_analyzer/test_fx/test_mod_dir.py
+++ b/tests/test_analyzer/test_fx/test_mod_dir.py
@@ -10,7 +10,6 @@ except:


 class LinearModel(torch.nn.Module):
-
    def __init__(self, in_features, out_features, bias):
        super().__init__()
        self.linear = torch.nn.Linear(in_features, out_features, bias=bias)
@@ -21,25 +20,14 @@ class LinearModel(torch.nn.Module):


 class ConvModel(torch.nn.Module):
-
    def __init__(self, in_channel, out_channels, kernel_size, bias) -> None:
        super().__init__()
-        self.conv = torch.nn.Conv2d(in_channel,
-                                    out_channels,
-                                    kernel_size,
-                                    bias=bias,
-                                    padding=1,
-                                    stride=2,
-                                    dilation=2,
-                                    groups=3)
-        self.conv_transpose = torch.nn.ConvTranspose2d(out_channels,
-                                                       out_channels,
-                                                       kernel_size,
-                                                       bias=bias,
-                                                       padding=1,
-                                                       stride=2,
-                                                       dilation=2,
-                                                       groups=3)
+        self.conv = torch.nn.Conv2d(
+            in_channel, out_channels, kernel_size, bias=bias, padding=1, stride=2, dilation=2, groups=3
+        )
+        self.conv_transpose = torch.nn.ConvTranspose2d(
+            out_channels, out_channels, kernel_size, bias=bias, padding=1, stride=2, dilation=2, groups=3
+        )

    def forward(self, x):
        x = self.conv(x)
@@ -48,7 +36,6 @@ class ConvModel(torch.nn.Module):


 class AModel(torch.nn.Module):
-
    def __init__(self, bias) -> None:
        super().__init__()
        self.linear_1 = LinearModel(3, 3, bias)
@@ -63,7 +50,7 @@ class AModel(torch.nn.Module):
        return x


-@pytest.mark.skipif(torch.__version__ < '1.12.0', reason='torch version < 12')
+@pytest.mark.skipif(torch.__version__ < "1.12.0", reason="torch version < 12")
@clear_cache_before_run()
@parameterize("bias", [True, False])
@parameterize("bias_addition_split", [True, False])
@@ -71,11 +58,11 @@ class AModel(torch.nn.Module):
 def test_mod_dir(bias, bias_addition_split, shape):
    model = AModel(bias=bias)
    x = torch.rand(shape)
-    gm = symbolic_trace(model, meta_args={'x': x}, bias_addition_split=bias_addition_split)
+    gm = symbolic_trace(model, meta_args={"x": x}, bias_addition_split=bias_addition_split)
    for node in gm.graph.nodes:
-        assert len(node.meta['info'].mod_dir), f"{node} should have non-trivial ``mod_dir``."
-        print(node, node.meta['info'].mod_dir)
+        assert len(node.meta["info"].mod_dir), f"{node} should have non-trivial ``mod_dir``."
+        print(node, node.meta["info"].mod_dir)


-if __name__ == '__main__':
+if __name__ == "__main__":
    test_mod_dir(bias=True, bias_addition_split=True, shape=(3, 3, 3))
--- a/tests/test_analyzer/test_fx/test_nested_ckpt.py
+++ b/tests/test_analyzer/test_fx/test_nested_ckpt.py
@@ -12,7 +12,6 @@ except:


 class MyModule(nn.Module):
-
    def __init__(self):
        super().__init__()
        self.a = nn.Linear(10, 10)
@@ -43,14 +42,14 @@ class MyModule(nn.Module):
        return checkpoint(self.checkpoint_0, x)


-@pytest.mark.skipif(torch.__version__ < '1.12.0', reason='torch version < 12')
+@pytest.mark.skipif(torch.__version__ < "1.12.0", reason="torch version < 12")
@clear_cache_before_run()
 def test_nested_ckpt():
    model = MyModule()
    x = torch.rand(10, 10)
-    gm = symbolic_trace(model, meta_args={'x': x}, trace_act_ckpt=True)
+    gm = symbolic_trace(model, meta_args={"x": x}, trace_act_ckpt=True)
    assert torch.allclose(gm(x), model(x)), "The traced model should generate the same output as the original model."
-    for ckpt_def in filter(lambda s: s.startswith('checkpoint'), dir(model)):
+    for ckpt_def in filter(lambda s: s.startswith("checkpoint"), dir(model)):
        assert ckpt_def in gm.code, f"Checkpoint {ckpt_def} should be in the traced code.\n Traced code = {gm.code}"


--- a/tests/test_analyzer/test_fx/test_shape_prop.py
+++ b/tests/test_analyzer/test_fx/test_shape_prop.py
@@ -1,6 +1,5 @@
 import pytest
 import torch
-import torchvision.models as tm
 from packaging import version

 from colossalai.testing.utils import clear_cache_before_run, parameterize
@@ -16,24 +15,25 @@ try:
    def linear_impl(*args, **kwargs):
        assert True
        return torch.nn.functional.linear(*args, **kwargs)
+
 except:
    pass


 def _check_gm_validity(gm: torch.fx.GraphModule):
    for node in gm.graph.nodes:
-        assert node.meta['info'].outputs, f'In {gm.__class__.__name__}, {node} has no output shape.'
+        assert node.meta["info"].outputs, f"In {gm.__class__.__name__}, {node} has no output shape."
        if node.op in [
-                'call_module',    # can apply to params
-                'call_function',    # can apply to params
-                'call_method',    # can apply to params
+            "call_module",  # can apply to params
+            "call_function",  # can apply to params
+            "call_method",  # can apply to params
        ]:
-            assert hasattr(node.meta['info'], 'inputs'), f'In {gm.__class__.__name__}, {node} has no input shape.'
+            assert hasattr(node.meta["info"], "inputs"), f"In {gm.__class__.__name__}, {node} has no input shape."


-@pytest.mark.skipif(version.parse(torch.__version__) < version.parse('1.12.0'), reason='torch version < 12')
+@pytest.mark.skipif(version.parse(torch.__version__) < version.parse("1.12.0"), reason="torch version < 12")
@clear_cache_before_run()
-@parameterize('m', tm_models)
+@parameterize("m", tm_models)
 def test_torchvision_shape_prop(m):
    with MetaTensorMode():
        model = m()
@@ -46,9 +46,9 @@ def test_torchvision_shape_prop(m):
    _check_gm_validity(gm)


-@pytest.mark.skipif(version.parse(torch.__version__) < version.parse('1.12.0'), reason='torch version < 12')
+@pytest.mark.skipif(version.parse(torch.__version__) < version.parse("1.12.0"), reason="torch version < 12")
@clear_cache_before_run()
-@parameterize('m', tmm_models)
+@parameterize("m", tmm_models)
 def test_timm_shape_prop(m):
    with MetaTensorMode():
        model = m()
--- a/tests/test_analyzer/test_fx/test_symbolic_profile.py
+++ b/tests/test_analyzer/test_fx/test_symbolic_profile.py
@@ -1,6 +1,5 @@
 import pytest
 import torch
-import torchvision.models as tm
 from packaging import version

 from colossalai.testing.utils import clear_cache_before_run, parameterize
@@ -15,12 +14,12 @@ except:

 def _check_gm_validity(gm: torch.fx.GraphModule):
    for node in gm.graph.nodes:
-        assert len(node.meta['info'].global_ctx), f'In {gm.__class__.__name__}, {node} has empty global context.'
+        assert len(node.meta["info"].global_ctx), f"In {gm.__class__.__name__}, {node} has empty global context."


-@pytest.mark.skipif(version.parse(torch.__version__) < version.parse('1.12.0'), reason='torch version < 12')
+@pytest.mark.skipif(version.parse(torch.__version__) < version.parse("1.12.0"), reason="torch version < 12")
@clear_cache_before_run()
-@parameterize('m', tm_models)
+@parameterize("m", tm_models)
 def test_torchvision_profile(m, verbose=False, bias_addition_split=False):
    with MetaTensorMode():
        model = m()
@@ -33,9 +32,9 @@ def test_torchvision_profile(m, verbose=False, bias_addition_split=False):
    _check_gm_validity(gm)


-@pytest.mark.skipif(version.parse(torch.__version__) < version.parse('1.12.0'), reason='torch version < 12')
+@pytest.mark.skipif(version.parse(torch.__version__) < version.parse("1.12.0"), reason="torch version < 12")
@clear_cache_before_run()
-@parameterize('m', tmm_models)
+@parameterize("m", tmm_models)
 def test_timm_profile(m, verbose=False, bias_addition_split=False):
    with MetaTensorMode():
        model = m()
--- a/tests/test_analyzer/test_subclasses/test_aten.py
+++ b/tests/test_analyzer/test_subclasses/test_aten.py
@@ -14,35 +14,41 @@ except:
 aten = torch.ops.aten

 registered_meta = {
-    ('aten.convolution.default', True): [    # (aten ops, requires_backward)
+    ("aten.convolution.default", True): [  # (aten ops, requires_backward)
        (nn.Conv1d(in_channels=3, out_channels=4, kernel_size=2, padding=1, dilation=2), torch.rand(2, 3, 4)),
        (nn.Conv2d(in_channels=3, out_channels=4, kernel_size=2, padding=1, dilation=2), torch.rand(2, 3, 4, 4)),
        (nn.Conv3d(in_channels=3, out_channels=4, kernel_size=2, padding=1, dilation=2), torch.rand(2, 3, 4, 4, 4)),
        (nn.ConvTranspose1d(in_channels=3, out_channels=4, kernel_size=2, padding=1, dilation=2), torch.rand(2, 3, 4)),
-        (nn.ConvTranspose2d(in_channels=3, out_channels=4, kernel_size=2, padding=1,
-                            dilation=2), torch.rand(2, 3, 4, 4)),
-        (nn.ConvTranspose3d(in_channels=3, out_channels=4, kernel_size=2, padding=1,
-                            dilation=2), torch.rand(2, 3, 4, 4, 4)),
+        (
+            nn.ConvTranspose2d(in_channels=3, out_channels=4, kernel_size=2, padding=1, dilation=2),
+            torch.rand(2, 3, 4, 4),
+        ),
+        (
+            nn.ConvTranspose3d(in_channels=3, out_channels=4, kernel_size=2, padding=1, dilation=2),
+            torch.rand(2, 3, 4, 4, 4),
+        ),
    ],
-    ('aten.native_batch_norm.default', True): [
+    ("aten.native_batch_norm.default", True): [
        (nn.BatchNorm1d(4), torch.rand(2, 4)),
        (nn.BatchNorm2d(4), torch.rand(1, 4, 4, 4)),
        (nn.BatchNorm3d(4), torch.rand(1, 4, 4, 4, 4)),
    ],
-    ('aten.native_layer_norm.default', True): [(nn.LayerNorm(4), torch.rand(1, 2, 3, 4)),],
-    ('aten.avg_pool1d.default', True): [
+    ("aten.native_layer_norm.default", True): [
+        (nn.LayerNorm(4), torch.rand(1, 2, 3, 4)),
+    ],
+    ("aten.avg_pool1d.default", True): [
        (nn.MaxPool1d(3, stride=2), torch.rand(4, 5, 5)),
        (nn.AvgPool1d(3, stride=2), torch.rand(4, 5, 5)),
        (nn.AdaptiveMaxPool1d(3), torch.rand(4, 5, 5)),
        (nn.AdaptiveAvgPool1d(3), torch.rand(4, 5, 5)),
    ],
-    ('aten.avg_pool2d.default', True): [
+    ("aten.avg_pool2d.default", True): [
        (nn.MaxPool2d((3, 2), stride=(2, 1)), torch.rand(2, 4, 5, 5)),
        (nn.AvgPool2d((3, 2), stride=(2, 1)), torch.rand(2, 4, 5, 5)),
        (nn.AdaptiveMaxPool2d((3, 2)), torch.rand(2, 4, 5, 5)),
        (nn.AdaptiveAvgPool2d((3, 2)), torch.rand(2, 4, 5, 5)),
    ],
-    ('aten.relu.default', True): [
+    ("aten.relu.default", True): [
        (nn.ReLU(), torch.rand(4, 3, 1, 2)),
        (nn.LeakyReLU(), torch.rand(4, 3, 1, 2)),
        (nn.SiLU(), torch.rand(4, 3, 1, 2)),
@@ -51,15 +57,20 @@ registered_meta = {
        (nn.Sigmoid(), torch.rand(4, 3, 1, 2)),
        (nn.Tanh(), torch.rand(4, 3, 1, 2)),
        (nn.Hardswish(), torch.rand(4, 3, 1, 2)),
-    ]
+    ],
 }


 def compare_all(tensor: torch.Tensor, meta_tensor: torch.Tensor) -> Any:
-    assert tensor.shape == meta_tensor.shape, f'the shape of tensor ({tensor.shape}) and meta tensor ({meta_tensor.shape}) does not match.'
-    assert tensor.dtype == meta_tensor.dtype, f'the dtype of tensor ({tensor.dtype}) and meta tensor ({meta_tensor.dtype}) does not match.'
-    assert tensor.stride() == meta_tensor.stride(
-    ), f'the stride of tensor ({tensor.stride()}) and meta tensor ({meta_tensor.stride()}) does not match.'
+    assert (
+        tensor.shape == meta_tensor.shape
+    ), f"the shape of tensor ({tensor.shape}) and meta tensor ({meta_tensor.shape}) does not match."
+    assert (
+        tensor.dtype == meta_tensor.dtype
+    ), f"the dtype of tensor ({tensor.dtype}) and meta tensor ({meta_tensor.dtype}) does not match."
+    assert (
+        tensor.stride() == meta_tensor.stride()
+    ), f"the stride of tensor ({tensor.stride()}) and meta tensor ({meta_tensor.stride()}) does not match."


 def run_and_compare(f: Union[nn.Module, Callable], x: torch.Tensor, requires_backward=False) -> Any:
@@ -73,7 +84,7 @@ def run_and_compare(f: Union[nn.Module, Callable], x: torch.Tensor, requires_bac
        compare_all(x.grad, meta_x.grad)


-@pytest.mark.skipif(torch.__version__ < '1.12.0', reason='torch version < 12')
+@pytest.mark.skipif(torch.__version__ < "1.12.0", reason="torch version < 12")
@clear_cache_before_run()
 def test_meta_aten():
    for (aten_op, requires_backward), v in registered_meta.items():
@@ -81,5 +92,5 @@ def test_meta_aten():
            run_and_compare(f, x, requires_backward)


-if __name__ == '__main__':
+if __name__ == "__main__":
    test_meta_aten()
--- a/tests/test_analyzer/test_subclasses/test_flop_tensor.py
+++ b/tests/test_analyzer/test_subclasses/test_flop_tensor.py
@@ -4,7 +4,6 @@ import torch.nn.functional as F
 import torchvision.models as tm
 from packaging import version

-from colossalai.testing import clear_cache_before_run, parameterize
 from tests.test_analyzer.test_fx.zoo import tm_models, tmm_models

 try:
@@ -13,40 +12,44 @@ except:
    pass


-@pytest.mark.skipif(version.parse(torch.__version__) < version.parse('1.12.0'), reason='torch version < 12')
-@pytest.mark.parametrize('m', tm_models + tmm_models)
+@pytest.mark.skipif(version.parse(torch.__version__) < version.parse("1.12.0"), reason="torch version < 12")
+@pytest.mark.parametrize("m", tm_models + tmm_models)
 def test_flop_count_module(m):
    x = torch.rand(2, 3, 224, 224)
-    with MetaTensorMode():    # save time for testing
+    with MetaTensorMode():  # save time for testing
        module = m()
    rs_fwd, rs_bwd = flop_count(module, x, verbose=True)
-    assert rs_fwd > 0, f'fwd flop count of {m.__name__} is {rs_fwd}'
-    assert rs_bwd > 0, f'bwd flop count of {m.__name__} is {rs_bwd}'
+    assert rs_fwd > 0, f"fwd flop count of {m.__name__} is {rs_fwd}"
+    assert rs_bwd > 0, f"bwd flop count of {m.__name__} is {rs_bwd}"


 odd_cases = [
-    (F.relu, (torch.rand(2, 3, 224, 224, requires_grad=True),), {
-        'inplace': True
-    }),
-    (F.max_pool2d, (torch.rand(2, 3, 224, 224, requires_grad=True),), {
-        'kernel_size': 3,
-        'stride': 2,
-        'padding': 1,
-        'dilation': 2
-    }),
-    (torch.where, (torch.rand(2, 3, 224, 224) > 0.5, torch.rand(2, 3, 224, 224, requires_grad=True),
-                   torch.rand(2, 3, 224, 224, requires_grad=True)), {}),
+    (F.relu, (torch.rand(2, 3, 224, 224, requires_grad=True),), {"inplace": True}),
+    (
+        F.max_pool2d,
+        (torch.rand(2, 3, 224, 224, requires_grad=True),),
+        {"kernel_size": 3, "stride": 2, "padding": 1, "dilation": 2},
+    ),
+    (
+        torch.where,
+        (
+            torch.rand(2, 3, 224, 224) > 0.5,
+            torch.rand(2, 3, 224, 224, requires_grad=True),
+            torch.rand(2, 3, 224, 224, requires_grad=True),
+        ),
+        {},
+    ),
 ]


-@pytest.mark.skipif(version.parse(torch.__version__) < version.parse('1.12.0'), reason='torch version < 12')
-@pytest.mark.parametrize('func, args, kwargs', odd_cases)
+@pytest.mark.skipif(version.parse(torch.__version__) < version.parse("1.12.0"), reason="torch version < 12")
+@pytest.mark.parametrize("func, args, kwargs", odd_cases)
 def test_flop_count_function(func, args, kwargs):
    rs_fwd, rs_bwd = flop_count(func, *args, **kwargs, verbose=True)
-    assert rs_fwd > 0, f'fwd flop count of {func.__name__} is {rs_fwd}'
-    assert rs_bwd > 0, f'bwd flop count of {func.__name__} is {rs_bwd}'
+    assert rs_fwd > 0, f"fwd flop count of {func.__name__} is {rs_fwd}"
+    assert rs_bwd > 0, f"bwd flop count of {func.__name__} is {rs_bwd}"


-if __name__ == '__main__':
+if __name__ == "__main__":
    test_flop_count_module(tm.resnet18)
-    test_flop_count_function(F.relu, (torch.rand(2, 3, 224, 224, requires_grad=True),), {'inplace': True})
+    test_flop_count_function(F.relu, (torch.rand(2, 3, 224, 224, requires_grad=True),), {"inplace": True})
--- a/tests/test_analyzer/test_subclasses/test_meta_mode.py
+++ b/tests/test_analyzer/test_subclasses/test_meta_mode.py
@@ -6,17 +6,22 @@ from packaging import version
 from colossalai.testing import clear_cache_before_run, parameterize

 try:
-    from colossalai._analyzer._subclasses import MetaTensor, MetaTensorMode
+    from colossalai._analyzer._subclasses import MetaTensorMode
 except:
    pass
 from tests.test_analyzer.test_fx.zoo import tm_models, tmm_models


 def compare_all(tensor: torch.Tensor, meta_tensor: torch.Tensor):
-    assert tensor.shape == meta_tensor.shape, f'the shape of tensor ({tensor.shape}) and meta tensor ({meta_tensor.shape}) does not match.'
-    assert tensor.dtype == meta_tensor.dtype, f'the dtype of tensor ({tensor.dtype}) and meta tensor ({meta_tensor.dtype}) does not match.'
-    assert tensor.stride() == meta_tensor.stride(
-    ), f'the stride of tensor ({tensor.stride()}) and meta tensor ({meta_tensor.stride()}) does not match.'
+    assert (
+        tensor.shape == meta_tensor.shape
+    ), f"the shape of tensor ({tensor.shape}) and meta tensor ({meta_tensor.shape}) does not match."
+    assert (
+        tensor.dtype == meta_tensor.dtype
+    ), f"the dtype of tensor ({tensor.dtype}) and meta tensor ({meta_tensor.dtype}) does not match."
+    assert (
+        tensor.stride() == meta_tensor.stride()
+    ), f"the stride of tensor ({tensor.stride()}) and meta tensor ({meta_tensor.stride()}) does not match."


 def run_and_compare(model):
@@ -31,12 +36,12 @@ def run_and_compare(model):
    compare_all(x.grad, meta_x.grad)


-@pytest.mark.skipif(version.parse(torch.__version__) < version.parse('1.12.0'), reason='torch version < 12')
+@pytest.mark.skipif(version.parse(torch.__version__) < version.parse("1.12.0"), reason="torch version < 12")
@clear_cache_before_run()
-@parameterize('m', tm_models + tmm_models)
+@parameterize("m", tm_models + tmm_models)
 def test_meta_mode_shape(m):
    run_and_compare(m())


-if __name__ == '__main__':
+if __name__ == "__main__":
    test_meta_mode_shape(tm.resnet18)