[legacy] move communication and nn to legacy and refactor logger (#4671)

* [legacy] move communication to legacy (#4640) * [legacy] refactor logger and clean up legacy codes (#4654) * [legacy] make logger independent to gpc * [legacy] make optim independent to registry * [legacy] move test engine to legacy * [legacy] move nn to legacy (#4656) * [legacy] move nn to legacy * [checkpointio] fix save hf config * [test] remove useledd rpc pp test * [legacy] fix nn init * [example] skip tutorial hybriad parallel example * [devops] test doc check * [devops] test doc check
2025-09-04 18:40:28 +00:00 · 2023-09-11 16:24:28 +08:00
parent 536397cc95
commit 554aa9592e
170 changed files with 781 additions and 758 deletions
--- a/tests/components_to_test/hanging_param_model.py
+++ b/tests/components_to_test/hanging_param_model.py
@@ -2,7 +2,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F

-from colossalai.nn import CheckpointModule
+from colossalai.legacy.nn import CheckpointModule

 from .registry import non_distributed_component_funcs
 from .utils.dummy_data_generator import DummyDataGenerator
--- a/tests/components_to_test/inline_op_model.py
+++ b/tests/components_to_test/inline_op_model.py
@@ -2,7 +2,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F

-from colossalai.nn import CheckpointModule
+from colossalai.legacy.nn import CheckpointModule

 from .registry import non_distributed_component_funcs
 from .utils.dummy_data_generator import DummyDataGenerator
--- a/tests/components_to_test/nested_model.py
+++ b/tests/components_to_test/nested_model.py
@@ -2,7 +2,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F

-from colossalai.nn import CheckpointModule
+from colossalai.legacy.nn import CheckpointModule

 from .registry import non_distributed_component_funcs
 from .utils import DummyDataGenerator
--- a/tests/components_to_test/repeated_computed_layers.py
+++ b/tests/components_to_test/repeated_computed_layers.py
@@ -3,7 +3,7 @@
 import torch
 import torch.nn as nn

-from colossalai.nn import CheckpointModule
+from colossalai.legacy.nn import CheckpointModule

 from .registry import non_distributed_component_funcs
 from .utils.dummy_data_generator import DummyDataGenerator
--- a/tests/components_to_test/simple_net.py
+++ b/tests/components_to_test/simple_net.py
@@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn

-from colossalai.nn import CheckpointModule
+from colossalai.legacy.nn import CheckpointModule
 from colossalai.utils.cuda import get_current_device

 from .registry import non_distributed_component_funcs
--- a/tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py
+++ b/tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py
@@ -1,10 +1,10 @@
 import pytest
 import torch

-from colossalai.communication.p2p_v2 import _recv_object, _send_object
 from colossalai.context import ParallelMode
 from colossalai.core import global_context as gpc
 from colossalai.initialize import launch
+from colossalai.legacy.communication.p2p_v2 import _recv_object, _send_object
 from colossalai.logging import disable_existing_loggers
 from colossalai.testing import rerun_if_address_is_in_use, spawn

--- a/tests/test_legacy/test_comm/test_comm.py
+++ b/tests/test_legacy/test_comm/test_comm.py
@@ -2,10 +2,10 @@ import pytest
 import torch
 import torch.distributed as dist

-from colossalai.communication import all_gather, all_reduce, reduce_scatter
 from colossalai.context import ParallelMode
 from colossalai.core import global_context as gpc
 from colossalai.initialize import launch
+from colossalai.legacy.communication import all_gather, all_reduce, reduce_scatter
 from colossalai.testing import rerun_if_address_is_in_use, spawn
 from colossalai.utils import get_current_device

--- a/tests/test_legacy/test_comm/test_object_list_p2p.py
+++ b/tests/test_legacy/test_comm/test_object_list_p2p.py
@@ -1,7 +1,10 @@
 import pytest
 import torch

-from colossalai.communication.p2p import (
+from colossalai.context import ParallelMode
+from colossalai.core import global_context as gpc
+from colossalai.initialize import launch
+from colossalai.legacy.communication.p2p import (
    recv_backward,
    recv_forward,
    send_backward,
@@ -9,9 +12,6 @@ from colossalai.communication.p2p import (
    send_forward,
    send_forward_recv_backward,
 )
-from colossalai.context import ParallelMode
-from colossalai.core import global_context as gpc
-from colossalai.initialize import launch
 from colossalai.testing import rerun_if_address_is_in_use, spawn

 CONFIG = dict(parallel=dict(pipeline=2))
--- a/tests/test_legacy/test_comm/test_object_list_p2p_v2.py
+++ b/tests/test_legacy/test_comm/test_object_list_p2p_v2.py
@@ -1,10 +1,10 @@
 import pytest
 import torch

-from colossalai.communication.p2p_v2 import recv_backward, recv_forward, send_backward, send_forward
 from colossalai.context import ParallelMode
 from colossalai.core import global_context as gpc
 from colossalai.initialize import launch
+from colossalai.legacy.communication.p2p_v2 import recv_backward, recv_forward, send_backward, send_forward
 from colossalai.logging import disable_existing_loggers
 from colossalai.testing import rerun_if_address_is_in_use, spawn

--- a/tests/test_legacy/test_engine/test_engine.py
+++ b/tests/test_legacy/test_engine/test_engine.py
--- a/tests/test_legacy/test_engine/test_gradient_accumluation.py
+++ b/tests/test_legacy/test_engine/test_gradient_accumluation.py
--- a/tests/test_legacy/test_layers/test_1d/checks_1d/init.py
+++ b/tests/test_legacy/test_layers/test_1d/checks_1d/init.py
--- a/tests/test_legacy/test_layers/test_1d/checks_1d/check_layer_1d.py
+++ b/tests/test_legacy/test_layers/test_1d/checks_1d/check_layer_1d.py
@@ -5,7 +5,7 @@ from torch.nn import Parameter
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
 from colossalai.global_variables import tensor_parallel_env as env
-from colossalai.nn import (
+from colossalai.legacy.nn import (
    Classifier1D,
    Embedding1D,
    Linear1D_Col,
--- a/tests/test_legacy/test_layers/test_1d/checks_1d/common.py
+++ b/tests/test_legacy/test_layers/test_1d/checks_1d/common.py
@@ -1,15 +1,16 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-import torch
-
-DEPTH = 4
-BATCH_SIZE = 8
-SEQ_LENGTH = 8
-IMG_SIZE = 16
-HIDDEN_SIZE = 8
-NUM_CLASSES = 8
-VOCAB_SIZE = 16
-
-def check_equal(A, B):
-    assert torch.allclose(A, B, rtol=1e-3, atol=1e-1) == True
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+
+import torch
+
+DEPTH = 4
+BATCH_SIZE = 8
+SEQ_LENGTH = 8
+IMG_SIZE = 16
+HIDDEN_SIZE = 8
+NUM_CLASSES = 8
+VOCAB_SIZE = 16
+
+
+def check_equal(A, B):
+    assert torch.allclose(A, B, rtol=1e-3, atol=1e-1) == True
--- a/tests/test_legacy/test_layers/test_1d/test_1d.py
+++ b/tests/test_legacy/test_layers/test_1d/test_1d.py
--- a/tests/test_legacy/test_layers/test_2d/checks_2d/init.py
+++ b/tests/test_legacy/test_layers/test_2d/checks_2d/init.py
--- a/tests/test_legacy/test_layers/test_2d/checks_2d/check_layer_2d.py
+++ b/tests/test_legacy/test_layers/test_2d/checks_2d/check_layer_2d.py
@@ -1,12 +1,23 @@
 import torch
+
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
-from colossalai.nn import (Classifier2D, CrossEntropyLoss2D, Embedding2D, LayerNorm2D, Linear2D, PatchEmbedding2D,
-                           VanillaClassifier, VanillaPatchEmbedding, VocabParallelClassifier2D,
-                           VocabParallelCrossEntropyLoss2D, VocabParallelEmbedding2D)
+from colossalai.legacy.nn import (
+    Classifier2D,
+    CrossEntropyLoss2D,
+    Embedding2D,
+    LayerNorm2D,
+    Linear2D,
+    PatchEmbedding2D,
+    VanillaClassifier,
+    VanillaPatchEmbedding,
+    VocabParallelClassifier2D,
+    VocabParallelCrossEntropyLoss2D,
+    VocabParallelEmbedding2D,
+)
 from colossalai.utils import get_current_device, print_rank_0

-from .common import (BATCH_SIZE, DEPTH, HIDDEN_SIZE, IMG_SIZE, NUM_CLASSES, SEQ_LENGTH, VOCAB_SIZE, check_equal)
+from .common import BATCH_SIZE, DEPTH, HIDDEN_SIZE, IMG_SIZE, NUM_CLASSES, SEQ_LENGTH, VOCAB_SIZE, check_equal


 def check_linear():
@@ -336,7 +347,7 @@ def check_classifier_no_given_weight():
    layer.weight.data.copy_(W)
    # W.requires_grad = True

-    B_shape = (OUTPUT_SIZE, )
+    B_shape = (OUTPUT_SIZE,)
    B_master = torch.randint(5, B_shape, dtype=dtype, device=device)
    torch.distributed.broadcast(B_master, src=0)
    # B = torch.chunk(B_master, DEPTH, dim=0)[j]
@@ -572,7 +583,7 @@ def check_loss():

    out_shape = (BATCH_SIZE, NUM_CLASSES)
    out_master = torch.randn(out_shape, dtype=dtype, device=device)
-    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE, ), dtype=torch.long, device=device)
+    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long, device=device)
    torch.distributed.broadcast(out_master, src=0)
    torch.distributed.broadcast(target_master, src=0)
    out = torch.chunk(out_master, DEPTH, dim=0)[i]
@@ -607,7 +618,7 @@ def check_vocab_parallel_loss():

    out_shape = (BATCH_SIZE, NUM_CLASSES)
    out_master = torch.randn(out_shape, dtype=dtype, device=device)
-    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE, ), dtype=torch.long, device=device)
+    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long, device=device)
    torch.distributed.broadcast(out_master, src=0)
    torch.distributed.broadcast(target_master, src=0)
    out = torch.chunk(out_master, DEPTH, dim=0)[i]
--- a/tests/test_legacy/test_layers/test_2d/checks_2d/check_operation_2d.py
+++ b/tests/test_legacy/test_layers/test_2d/checks_2d/check_operation_2d.py
@@ -5,10 +5,10 @@ import torch

 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
-from colossalai.nn.layer.parallel_2d._operation import Matmul_AB_2D, Matmul_ABT_2D, Matmul_ATB_2D
-from colossalai.utils import get_current_device
-from colossalai.utils import print_rank_0
-from .common import check_equal, BATCH_SIZE, SEQ_LENGTH, HIDDEN_SIZE, DEPTH
+from colossalai.legacy.nn.layer.parallel_2d._operation import Matmul_AB_2D, Matmul_ABT_2D, Matmul_ATB_2D
+from colossalai.utils import get_current_device, print_rank_0
+
+from .common import BATCH_SIZE, DEPTH, HIDDEN_SIZE, SEQ_LENGTH, check_equal


 def check_AB():
--- a/tests/test_legacy/test_layers/test_2d/checks_2d/common.py
+++ b/tests/test_legacy/test_layers/test_2d/checks_2d/common.py
--- a/tests/test_legacy/test_layers/test_2d/test_2d.py
+++ b/tests/test_legacy/test_layers/test_2d/test_2d.py
--- a/tests/test_legacy/test_layers/test_2p5d/checks_2p5d/init.py
+++ b/tests/test_legacy/test_layers/test_2p5d/checks_2p5d/init.py
--- a/tests/test_legacy/test_layers/test_2p5d/checks_2p5d/check_layer_2p5d.py
+++ b/tests/test_legacy/test_layers/test_2p5d/checks_2p5d/check_layer_2p5d.py
@@ -1,11 +1,22 @@
 import torch
+from torch.nn import Parameter
+
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
-from colossalai.nn import (Classifier2p5D, CrossEntropyLoss2p5D, Embedding2p5D, LayerNorm2p5D, Linear2p5D,
-                           PatchEmbedding2p5D, VanillaClassifier, VanillaPatchEmbedding, VocabParallelClassifier2p5D,
-                           VocabParallelCrossEntropyLoss2p5D, VocabParallelEmbedding2p5D)
+from colossalai.legacy.nn import (
+    Classifier2p5D,
+    CrossEntropyLoss2p5D,
+    Embedding2p5D,
+    LayerNorm2p5D,
+    Linear2p5D,
+    PatchEmbedding2p5D,
+    VanillaClassifier,
+    VanillaPatchEmbedding,
+    VocabParallelClassifier2p5D,
+    VocabParallelCrossEntropyLoss2p5D,
+    VocabParallelEmbedding2p5D,
+)
 from colossalai.utils import get_current_device, print_rank_0
-from torch.nn import Parameter

 from .common import *

@@ -342,7 +353,7 @@ def check_classifier_no_given_weight():
    layer.weight.data.copy_(W)
    # W.requires_grad = True

-    B_shape = (OUTPUT_SIZE, )
+    B_shape = (OUTPUT_SIZE,)
    B_master = torch.randint(5, B_shape, dtype=dtype, device=device)
    torch.distributed.broadcast(B_master, src=0)
    # B = torch.chunk(B_master, TESSERACT_DIM, dim=0)[j]
@@ -577,7 +588,7 @@ def check_loss():

    out_shape = (BATCH_SIZE, NUM_CLASSES)
    out_master = torch.randn(out_shape, dtype=dtype, device=device)
-    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE, ), dtype=torch.long, device=device)
+    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long, device=device)
    torch.distributed.broadcast(out_master, src=0)
    torch.distributed.broadcast(target_master, src=0)
    out = torch.chunk(out_master, TESSERACT_DIM, dim=0)[i]
@@ -612,7 +623,7 @@ def check_vocab_parallel_loss():

    out_shape = (BATCH_SIZE, NUM_CLASSES)
    out_master = torch.randn(out_shape, dtype=dtype, device=device)
-    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE, ), dtype=torch.long, device=device)
+    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long, device=device)
    torch.distributed.broadcast(out_master, src=0)
    torch.distributed.broadcast(target_master, src=0)
    out = torch.chunk(out_master, TESSERACT_DIM, dim=0)[i]
--- a/tests/test_legacy/test_layers/test_2p5d/checks_2p5d/check_operation_2p5d.py
+++ b/tests/test_legacy/test_layers/test_2p5d/checks_2p5d/check_operation_2p5d.py
@@ -2,10 +2,9 @@ import torch

 from colossalai.context import ParallelMode
 from colossalai.core import global_context as gpc
-from colossalai.nn.layer.parallel_2p5d._operation import Matmul_AB_2p5D, Matmul_ABT_2p5D, \
-    Matmul_ATB_2p5D
-from colossalai.utils import get_current_device
-from colossalai.utils import print_rank_0
+from colossalai.legacy.nn.layer.parallel_2p5d._operation import Matmul_AB_2p5D, Matmul_ABT_2p5D, Matmul_ATB_2p5D
+from colossalai.utils import get_current_device, print_rank_0
+
 from .common import *


--- a/tests/test_legacy/test_layers/test_2p5d/checks_2p5d/common.py
+++ b/tests/test_legacy/test_layers/test_2p5d/checks_2p5d/common.py
@@ -11,4 +11,4 @@ IMG_SIZE = 16


 def check_equal(A, B):
-    assert torch.allclose(A, B, rtol=1e-5, atol=1e-2)
+    assert torch.allclose(A, B, rtol=1e-5, atol=1e-2)
--- a/tests/test_legacy/test_layers/test_2p5d/test_2p5d.py
+++ b/tests/test_legacy/test_layers/test_2p5d/test_2p5d.py
--- a/tests/test_legacy/test_layers/test_3d/checks_3d/init.py
+++ b/tests/test_legacy/test_layers/test_3d/checks_3d/init.py
--- a/tests/test_legacy/test_layers/test_3d/checks_3d/check_layer_3d.py
+++ b/tests/test_legacy/test_layers/test_3d/checks_3d/check_layer_3d.py
@@ -7,8 +7,7 @@ import torch

 from colossalai.constants import INPUT_GROUP_3D, OUTPUT_GROUP_3D, WEIGHT_GROUP_3D
 from colossalai.core import global_context
-from colossalai.logging import get_dist_logger
-from colossalai.nn import (
+from colossalai.legacy.nn import (
    Classifier3D,
    CrossEntropyLoss3D,
    Embedding3D,
@@ -21,7 +20,8 @@ from colossalai.nn import (
    VocabParallelCrossEntropyLoss3D,
    VocabParallelEmbedding3D,
 )
-from colossalai.nn.layer.parallel_3d._utils import get_parallel_mode_from_env
+from colossalai.legacy.nn.layer.parallel_3d._utils import get_parallel_mode_from_env
+from colossalai.logging import get_dist_logger
 from colossalai.utils import get_current_device, print_rank_0

 from .common import BATCH_SIZE, DEPTH, HIDDEN_SIZE, IMG_SIZE, NUM_CLASSES, SEQ_LENGTH, VOCAB_SIZE, check_equal
--- a/tests/test_legacy/test_layers/test_3d/checks_3d/common.py
+++ b/tests/test_legacy/test_layers/test_3d/checks_3d/common.py
@@ -16,4 +16,4 @@ VOCAB_SIZE = 16
 def check_equal(A, B):
    eq = torch.allclose(A, B, rtol=1e-3, atol=1e-2)
    assert eq, f"\nA = {A}\nB = {B}"
-    return eq
+    return eq
--- a/tests/test_legacy/test_layers/test_3d/test_3d.py
+++ b/tests/test_legacy/test_layers/test_3d/test_3d.py
--- a/tests/test_legacy/test_layers/test_cache_embedding.py
+++ b/tests/test_legacy/test_layers/test_cache_embedding.py
@@ -6,7 +6,7 @@ import pytest
 import torch

 import colossalai
-from colossalai.nn.parallel.layers import (
+from colossalai.legacy.nn.parallel.layers import (
    CachedEmbeddingBag,
    CachedParamMgr,
    EvictionStrategy,
--- a/tests/test_legacy/test_layers/test_sequence/checks_seq/init.py
+++ b/tests/test_legacy/test_layers/test_sequence/checks_seq/init.py
--- a/tests/test_legacy/test_layers/test_sequence/checks_seq/check_layer_seq.py
+++ b/tests/test_legacy/test_layers/test_sequence/checks_seq/check_layer_seq.py
@@ -2,7 +2,7 @@ import torch

 from colossalai.context import ParallelMode
 from colossalai.core import global_context as gpc
-from colossalai.nn import TransformerSelfAttentionRing
+from colossalai.legacy.nn import TransformerSelfAttentionRing
 from colossalai.utils import get_current_device


--- a/tests/test_legacy/test_layers/test_sequence/test_sequence.py
+++ b/tests/test_legacy/test_layers/test_sequence/test_sequence.py
@@ -5,6 +5,7 @@ import torch.distributed as dist
 import colossalai
 from colossalai.context import ParallelMode
 from colossalai.core import global_context as gpc
+from colossalai.legacy.nn.layer.parallel_sequence import RingAV, RingQK
 from colossalai.testing import rerun_if_address_is_in_use, spawn

 CONFIG = dict(parallel=dict(tensor=dict(size=4, mode='sequence')))
@@ -42,7 +43,7 @@ def check_ring_qk(rank, world_size):
    a = torch.matmul(q, k.transpose(2, 1))

    # compute distributed attention scores
-    ring_qk = colossalai.nn.layer.parallel_sequence.RingQK.apply
+    ring_qk = RingQK.apply
    sub_a = ring_qk(sub_q, sub_k, batch_size, num_heads, sub_seq_length)

    # check master and distributed attention scores
@@ -95,7 +96,7 @@ def check_ring_av(rank, world_size):
    out = torch.matmul(a, v)

    # compute distributed attention scores
-    ring_av = colossalai.nn.layer.parallel_sequence.RingAV.apply
+    ring_av = RingAV.apply
    sub_out = ring_av(sub_a, sub_v, batch_size, num_heads, attention_head_size, sub_seq_length)

    # print(f'master output shape: {out.shape}, partial output shape: {sub_out.shape}')
--- a/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py
+++ b/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py
@@ -5,7 +5,10 @@ import pytest
 import torch
 import torch.distributed as dist

-from colossalai.communication import (
+from colossalai.context.parallel_mode import ParallelMode
+from colossalai.core import global_context as gpc
+from colossalai.initialize import launch
+from colossalai.legacy.communication import (
    recv_backward,
    recv_forward,
    recv_obj_meta,
@@ -15,9 +18,6 @@ from colossalai.communication import (
    send_forward_recv_backward,
    send_obj_meta,
 )
-from colossalai.context.parallel_mode import ParallelMode
-from colossalai.core import global_context as gpc
-from colossalai.initialize import launch
 from colossalai.logging import get_dist_logger
 from colossalai.testing import rerun_if_address_is_in_use, spawn
 from colossalai.utils import get_current_device
--- a/tests/test_pipeline/test_cuda_rpc_performance.py
+++ b/tests/test_pipeline/test_cuda_rpc_performance.py
@@ -1,81 +0,0 @@
-import os
-import time
-
-import pytest
-import torch
-import torch.nn as nn
-from rpc_test_utils import parse_args, rpc_run
-from titans.dataloader.cifar10 import build_cifar
-from torchvision.models import resnet50
-from tqdm import tqdm
-
-from colossalai.pipeline.pipelinable import PipelinableContext
-from colossalai.pipeline.rpc import OneFOneBPipelineEngine
-
-
-def flatten(x):
-    return torch.flatten(x, 1)
-
-
-def partition(pp_rank: int, chunk: int, stage_num: int):
-    pipelinable = PipelinableContext()
-
-    # build model partitions
-    with pipelinable:
-        # input : [B, 3, 32, 32]
-        _ = resnet50()
-
-    pipelinable.policy = "customized"
-
-    exec_seq = [
-        'conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3', 'layer4', 'avgpool', (flatten, "behind"), 'fc'
-    ]
-    pipelinable.to_layer_list(exec_seq)
-    partition = pipelinable.partition(chunk, stage_num, pp_rank)
-    return partition
-
-
-def run_master(args):
-    batch_size = args.batch_size
-    chunk = args.chunk
-    device = args.device
-    world_size = args.world_size
-    stage_num = world_size
-    num_microbatches = args.num_microbatches
-
-    # build dataloader
-    root = os.environ.get('DATA', './data')
-    train_dataloader, test_dataloader = build_cifar(batch_size, root, padding=4, crop=32, resize=32)
-    criterion = nn.CrossEntropyLoss()
-
-    pp_engine = OneFOneBPipelineEngine(partition_fn=partition,
-                                       stage_num=stage_num,
-                                       num_microbatches=num_microbatches,
-                                       device=device,
-                                       chunk=chunk,
-                                       criterion=criterion,
-                                       checkpoint=False)
-
-    pp_engine.initialize_optimizer(torch.optim.Adam, lr=1e-3)
-    s = time.time()
-
-    for bx, by in tqdm(train_dataloader):
-        pp_engine.forward_backward(bx, labels=by, forward_only=False)
-
-    cost_time = time.time() - s
-
-    print("total cost time :", cost_time)
-    print("cost time per batch:", cost_time / len(train_dataloader))
-
-
-@pytest.mark.skip("Test for performance, no need for CI")
-def main():
-    args = parse_args()
-    # this is due to limitation of partition function
-    args.world_size = 2
-    args.chunk = 1
-    rpc_run(args, run_master)
-
-
-if __name__ == '__main__':
-    main()
--- a/tests/test_utils/test_checkpoint/test_checkpoint_1d.py
+++ b/tests/test_utils/test_checkpoint/test_checkpoint_1d.py
@@ -7,7 +7,7 @@ import pytest
 import torch
 import torch.nn as nn

-import colossalai.nn as col_nn
+import colossalai.legacy.nn as col_nn
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
 from colossalai.initialize import launch
--- a/tests/test_utils/test_checkpoint/test_checkpoint_2d.py
+++ b/tests/test_utils/test_checkpoint/test_checkpoint_2d.py
@@ -7,7 +7,7 @@ import pytest
 import torch
 import torch.nn as nn

-import colossalai.nn as col_nn
+import colossalai.legacy.nn as col_nn
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
 from colossalai.initialize import launch
--- a/tests/test_utils/test_checkpoint/test_checkpoint_2p5d.py
+++ b/tests/test_utils/test_checkpoint/test_checkpoint_2p5d.py
@@ -7,7 +7,7 @@ import pytest
 import torch
 import torch.nn as nn

-import colossalai.nn as col_nn
+import colossalai.legacy.nn as col_nn
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
 from colossalai.initialize import launch
--- a/tests/test_utils/test_checkpoint/test_checkpoint_3d.py
+++ b/tests/test_utils/test_checkpoint/test_checkpoint_3d.py
@@ -7,7 +7,7 @@ import pytest
 import torch
 import torch.nn as nn

-import colossalai.nn as col_nn
+import colossalai.legacy.nn as col_nn
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
 from colossalai.initialize import launch