mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-04 18:40:28 +00:00
[legacy] move communication and nn to legacy and refactor logger (#4671)
* [legacy] move communication to legacy (#4640) * [legacy] refactor logger and clean up legacy codes (#4654) * [legacy] make logger independent to gpc * [legacy] make optim independent to registry * [legacy] move test engine to legacy * [legacy] move nn to legacy (#4656) * [legacy] move nn to legacy * [checkpointio] fix save hf config * [test] remove useledd rpc pp test * [legacy] fix nn init * [example] skip tutorial hybriad parallel example * [devops] test doc check * [devops] test doc check
This commit is contained in:
@@ -2,7 +2,7 @@ import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from colossalai.nn import CheckpointModule
|
||||
from colossalai.legacy.nn import CheckpointModule
|
||||
|
||||
from .registry import non_distributed_component_funcs
|
||||
from .utils.dummy_data_generator import DummyDataGenerator
|
||||
|
@@ -2,7 +2,7 @@ import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from colossalai.nn import CheckpointModule
|
||||
from colossalai.legacy.nn import CheckpointModule
|
||||
|
||||
from .registry import non_distributed_component_funcs
|
||||
from .utils.dummy_data_generator import DummyDataGenerator
|
||||
|
@@ -2,7 +2,7 @@ import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from colossalai.nn import CheckpointModule
|
||||
from colossalai.legacy.nn import CheckpointModule
|
||||
|
||||
from .registry import non_distributed_component_funcs
|
||||
from .utils import DummyDataGenerator
|
||||
|
@@ -3,7 +3,7 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from colossalai.nn import CheckpointModule
|
||||
from colossalai.legacy.nn import CheckpointModule
|
||||
|
||||
from .registry import non_distributed_component_funcs
|
||||
from .utils.dummy_data_generator import DummyDataGenerator
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from colossalai.nn import CheckpointModule
|
||||
from colossalai.legacy.nn import CheckpointModule
|
||||
from colossalai.utils.cuda import get_current_device
|
||||
|
||||
from .registry import non_distributed_component_funcs
|
||||
|
@@ -1,10 +1,10 @@
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from colossalai.communication.p2p_v2 import _recv_object, _send_object
|
||||
from colossalai.context import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.legacy.communication.p2p_v2 import _recv_object, _send_object
|
||||
from colossalai.logging import disable_existing_loggers
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
|
@@ -2,10 +2,10 @@ import pytest
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
from colossalai.communication import all_gather, all_reduce, reduce_scatter
|
||||
from colossalai.context import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.legacy.communication import all_gather, all_reduce, reduce_scatter
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
from colossalai.utils import get_current_device
|
||||
|
@@ -1,7 +1,10 @@
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from colossalai.communication.p2p import (
|
||||
from colossalai.context import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.legacy.communication.p2p import (
|
||||
recv_backward,
|
||||
recv_forward,
|
||||
send_backward,
|
||||
@@ -9,9 +12,6 @@ from colossalai.communication.p2p import (
|
||||
send_forward,
|
||||
send_forward_recv_backward,
|
||||
)
|
||||
from colossalai.context import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
|
||||
CONFIG = dict(parallel=dict(pipeline=2))
|
@@ -1,10 +1,10 @@
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from colossalai.communication.p2p_v2 import recv_backward, recv_forward, send_backward, send_forward
|
||||
from colossalai.context import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.legacy.communication.p2p_v2 import recv_backward, recv_forward, send_backward, send_forward
|
||||
from colossalai.logging import disable_existing_loggers
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
|
@@ -5,7 +5,7 @@ from torch.nn import Parameter
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.global_variables import tensor_parallel_env as env
|
||||
from colossalai.nn import (
|
||||
from colossalai.legacy.nn import (
|
||||
Classifier1D,
|
||||
Embedding1D,
|
||||
Linear1D_Col,
|
@@ -1,15 +1,16 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import torch
|
||||
|
||||
DEPTH = 4
|
||||
BATCH_SIZE = 8
|
||||
SEQ_LENGTH = 8
|
||||
IMG_SIZE = 16
|
||||
HIDDEN_SIZE = 8
|
||||
NUM_CLASSES = 8
|
||||
VOCAB_SIZE = 16
|
||||
|
||||
def check_equal(A, B):
|
||||
assert torch.allclose(A, B, rtol=1e-3, atol=1e-1) == True
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import torch
|
||||
|
||||
DEPTH = 4
|
||||
BATCH_SIZE = 8
|
||||
SEQ_LENGTH = 8
|
||||
IMG_SIZE = 16
|
||||
HIDDEN_SIZE = 8
|
||||
NUM_CLASSES = 8
|
||||
VOCAB_SIZE = 16
|
||||
|
||||
|
||||
def check_equal(A, B):
|
||||
assert torch.allclose(A, B, rtol=1e-3, atol=1e-1) == True
|
@@ -1,12 +1,23 @@
|
||||
import torch
|
||||
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.nn import (Classifier2D, CrossEntropyLoss2D, Embedding2D, LayerNorm2D, Linear2D, PatchEmbedding2D,
|
||||
VanillaClassifier, VanillaPatchEmbedding, VocabParallelClassifier2D,
|
||||
VocabParallelCrossEntropyLoss2D, VocabParallelEmbedding2D)
|
||||
from colossalai.legacy.nn import (
|
||||
Classifier2D,
|
||||
CrossEntropyLoss2D,
|
||||
Embedding2D,
|
||||
LayerNorm2D,
|
||||
Linear2D,
|
||||
PatchEmbedding2D,
|
||||
VanillaClassifier,
|
||||
VanillaPatchEmbedding,
|
||||
VocabParallelClassifier2D,
|
||||
VocabParallelCrossEntropyLoss2D,
|
||||
VocabParallelEmbedding2D,
|
||||
)
|
||||
from colossalai.utils import get_current_device, print_rank_0
|
||||
|
||||
from .common import (BATCH_SIZE, DEPTH, HIDDEN_SIZE, IMG_SIZE, NUM_CLASSES, SEQ_LENGTH, VOCAB_SIZE, check_equal)
|
||||
from .common import BATCH_SIZE, DEPTH, HIDDEN_SIZE, IMG_SIZE, NUM_CLASSES, SEQ_LENGTH, VOCAB_SIZE, check_equal
|
||||
|
||||
|
||||
def check_linear():
|
||||
@@ -336,7 +347,7 @@ def check_classifier_no_given_weight():
|
||||
layer.weight.data.copy_(W)
|
||||
# W.requires_grad = True
|
||||
|
||||
B_shape = (OUTPUT_SIZE, )
|
||||
B_shape = (OUTPUT_SIZE,)
|
||||
B_master = torch.randint(5, B_shape, dtype=dtype, device=device)
|
||||
torch.distributed.broadcast(B_master, src=0)
|
||||
# B = torch.chunk(B_master, DEPTH, dim=0)[j]
|
||||
@@ -572,7 +583,7 @@ def check_loss():
|
||||
|
||||
out_shape = (BATCH_SIZE, NUM_CLASSES)
|
||||
out_master = torch.randn(out_shape, dtype=dtype, device=device)
|
||||
target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE, ), dtype=torch.long, device=device)
|
||||
target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long, device=device)
|
||||
torch.distributed.broadcast(out_master, src=0)
|
||||
torch.distributed.broadcast(target_master, src=0)
|
||||
out = torch.chunk(out_master, DEPTH, dim=0)[i]
|
||||
@@ -607,7 +618,7 @@ def check_vocab_parallel_loss():
|
||||
|
||||
out_shape = (BATCH_SIZE, NUM_CLASSES)
|
||||
out_master = torch.randn(out_shape, dtype=dtype, device=device)
|
||||
target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE, ), dtype=torch.long, device=device)
|
||||
target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long, device=device)
|
||||
torch.distributed.broadcast(out_master, src=0)
|
||||
torch.distributed.broadcast(target_master, src=0)
|
||||
out = torch.chunk(out_master, DEPTH, dim=0)[i]
|
@@ -5,10 +5,10 @@ import torch
|
||||
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.nn.layer.parallel_2d._operation import Matmul_AB_2D, Matmul_ABT_2D, Matmul_ATB_2D
|
||||
from colossalai.utils import get_current_device
|
||||
from colossalai.utils import print_rank_0
|
||||
from .common import check_equal, BATCH_SIZE, SEQ_LENGTH, HIDDEN_SIZE, DEPTH
|
||||
from colossalai.legacy.nn.layer.parallel_2d._operation import Matmul_AB_2D, Matmul_ABT_2D, Matmul_ATB_2D
|
||||
from colossalai.utils import get_current_device, print_rank_0
|
||||
|
||||
from .common import BATCH_SIZE, DEPTH, HIDDEN_SIZE, SEQ_LENGTH, check_equal
|
||||
|
||||
|
||||
def check_AB():
|
@@ -1,11 +1,22 @@
|
||||
import torch
|
||||
from torch.nn import Parameter
|
||||
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.nn import (Classifier2p5D, CrossEntropyLoss2p5D, Embedding2p5D, LayerNorm2p5D, Linear2p5D,
|
||||
PatchEmbedding2p5D, VanillaClassifier, VanillaPatchEmbedding, VocabParallelClassifier2p5D,
|
||||
VocabParallelCrossEntropyLoss2p5D, VocabParallelEmbedding2p5D)
|
||||
from colossalai.legacy.nn import (
|
||||
Classifier2p5D,
|
||||
CrossEntropyLoss2p5D,
|
||||
Embedding2p5D,
|
||||
LayerNorm2p5D,
|
||||
Linear2p5D,
|
||||
PatchEmbedding2p5D,
|
||||
VanillaClassifier,
|
||||
VanillaPatchEmbedding,
|
||||
VocabParallelClassifier2p5D,
|
||||
VocabParallelCrossEntropyLoss2p5D,
|
||||
VocabParallelEmbedding2p5D,
|
||||
)
|
||||
from colossalai.utils import get_current_device, print_rank_0
|
||||
from torch.nn import Parameter
|
||||
|
||||
from .common import *
|
||||
|
||||
@@ -342,7 +353,7 @@ def check_classifier_no_given_weight():
|
||||
layer.weight.data.copy_(W)
|
||||
# W.requires_grad = True
|
||||
|
||||
B_shape = (OUTPUT_SIZE, )
|
||||
B_shape = (OUTPUT_SIZE,)
|
||||
B_master = torch.randint(5, B_shape, dtype=dtype, device=device)
|
||||
torch.distributed.broadcast(B_master, src=0)
|
||||
# B = torch.chunk(B_master, TESSERACT_DIM, dim=0)[j]
|
||||
@@ -577,7 +588,7 @@ def check_loss():
|
||||
|
||||
out_shape = (BATCH_SIZE, NUM_CLASSES)
|
||||
out_master = torch.randn(out_shape, dtype=dtype, device=device)
|
||||
target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE, ), dtype=torch.long, device=device)
|
||||
target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long, device=device)
|
||||
torch.distributed.broadcast(out_master, src=0)
|
||||
torch.distributed.broadcast(target_master, src=0)
|
||||
out = torch.chunk(out_master, TESSERACT_DIM, dim=0)[i]
|
||||
@@ -612,7 +623,7 @@ def check_vocab_parallel_loss():
|
||||
|
||||
out_shape = (BATCH_SIZE, NUM_CLASSES)
|
||||
out_master = torch.randn(out_shape, dtype=dtype, device=device)
|
||||
target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE, ), dtype=torch.long, device=device)
|
||||
target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long, device=device)
|
||||
torch.distributed.broadcast(out_master, src=0)
|
||||
torch.distributed.broadcast(target_master, src=0)
|
||||
out = torch.chunk(out_master, TESSERACT_DIM, dim=0)[i]
|
@@ -2,10 +2,9 @@ import torch
|
||||
|
||||
from colossalai.context import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.nn.layer.parallel_2p5d._operation import Matmul_AB_2p5D, Matmul_ABT_2p5D, \
|
||||
Matmul_ATB_2p5D
|
||||
from colossalai.utils import get_current_device
|
||||
from colossalai.utils import print_rank_0
|
||||
from colossalai.legacy.nn.layer.parallel_2p5d._operation import Matmul_AB_2p5D, Matmul_ABT_2p5D, Matmul_ATB_2p5D
|
||||
from colossalai.utils import get_current_device, print_rank_0
|
||||
|
||||
from .common import *
|
||||
|
||||
|
@@ -11,4 +11,4 @@ IMG_SIZE = 16
|
||||
|
||||
|
||||
def check_equal(A, B):
|
||||
assert torch.allclose(A, B, rtol=1e-5, atol=1e-2)
|
||||
assert torch.allclose(A, B, rtol=1e-5, atol=1e-2)
|
@@ -7,8 +7,7 @@ import torch
|
||||
|
||||
from colossalai.constants import INPUT_GROUP_3D, OUTPUT_GROUP_3D, WEIGHT_GROUP_3D
|
||||
from colossalai.core import global_context
|
||||
from colossalai.logging import get_dist_logger
|
||||
from colossalai.nn import (
|
||||
from colossalai.legacy.nn import (
|
||||
Classifier3D,
|
||||
CrossEntropyLoss3D,
|
||||
Embedding3D,
|
||||
@@ -21,7 +20,8 @@ from colossalai.nn import (
|
||||
VocabParallelCrossEntropyLoss3D,
|
||||
VocabParallelEmbedding3D,
|
||||
)
|
||||
from colossalai.nn.layer.parallel_3d._utils import get_parallel_mode_from_env
|
||||
from colossalai.legacy.nn.layer.parallel_3d._utils import get_parallel_mode_from_env
|
||||
from colossalai.logging import get_dist_logger
|
||||
from colossalai.utils import get_current_device, print_rank_0
|
||||
|
||||
from .common import BATCH_SIZE, DEPTH, HIDDEN_SIZE, IMG_SIZE, NUM_CLASSES, SEQ_LENGTH, VOCAB_SIZE, check_equal
|
@@ -16,4 +16,4 @@ VOCAB_SIZE = 16
|
||||
def check_equal(A, B):
|
||||
eq = torch.allclose(A, B, rtol=1e-3, atol=1e-2)
|
||||
assert eq, f"\nA = {A}\nB = {B}"
|
||||
return eq
|
||||
return eq
|
@@ -6,7 +6,7 @@ import pytest
|
||||
import torch
|
||||
|
||||
import colossalai
|
||||
from colossalai.nn.parallel.layers import (
|
||||
from colossalai.legacy.nn.parallel.layers import (
|
||||
CachedEmbeddingBag,
|
||||
CachedParamMgr,
|
||||
EvictionStrategy,
|
@@ -2,7 +2,7 @@ import torch
|
||||
|
||||
from colossalai.context import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.nn import TransformerSelfAttentionRing
|
||||
from colossalai.legacy.nn import TransformerSelfAttentionRing
|
||||
from colossalai.utils import get_current_device
|
||||
|
||||
|
@@ -5,6 +5,7 @@ import torch.distributed as dist
|
||||
import colossalai
|
||||
from colossalai.context import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.legacy.nn.layer.parallel_sequence import RingAV, RingQK
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
|
||||
CONFIG = dict(parallel=dict(tensor=dict(size=4, mode='sequence')))
|
||||
@@ -42,7 +43,7 @@ def check_ring_qk(rank, world_size):
|
||||
a = torch.matmul(q, k.transpose(2, 1))
|
||||
|
||||
# compute distributed attention scores
|
||||
ring_qk = colossalai.nn.layer.parallel_sequence.RingQK.apply
|
||||
ring_qk = RingQK.apply
|
||||
sub_a = ring_qk(sub_q, sub_k, batch_size, num_heads, sub_seq_length)
|
||||
|
||||
# check master and distributed attention scores
|
||||
@@ -95,7 +96,7 @@ def check_ring_av(rank, world_size):
|
||||
out = torch.matmul(a, v)
|
||||
|
||||
# compute distributed attention scores
|
||||
ring_av = colossalai.nn.layer.parallel_sequence.RingAV.apply
|
||||
ring_av = RingAV.apply
|
||||
sub_out = ring_av(sub_a, sub_v, batch_size, num_heads, attention_head_size, sub_seq_length)
|
||||
|
||||
# print(f'master output shape: {out.shape}, partial output shape: {sub_out.shape}')
|
@@ -5,7 +5,10 @@ import pytest
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
from colossalai.communication import (
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.legacy.communication import (
|
||||
recv_backward,
|
||||
recv_forward,
|
||||
recv_obj_meta,
|
||||
@@ -15,9 +18,6 @@ from colossalai.communication import (
|
||||
send_forward_recv_backward,
|
||||
send_obj_meta,
|
||||
)
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.logging import get_dist_logger
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
from colossalai.utils import get_current_device
|
||||
|
@@ -1,81 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from rpc_test_utils import parse_args, rpc_run
|
||||
from titans.dataloader.cifar10 import build_cifar
|
||||
from torchvision.models import resnet50
|
||||
from tqdm import tqdm
|
||||
|
||||
from colossalai.pipeline.pipelinable import PipelinableContext
|
||||
from colossalai.pipeline.rpc import OneFOneBPipelineEngine
|
||||
|
||||
|
||||
def flatten(x):
|
||||
return torch.flatten(x, 1)
|
||||
|
||||
|
||||
def partition(pp_rank: int, chunk: int, stage_num: int):
|
||||
pipelinable = PipelinableContext()
|
||||
|
||||
# build model partitions
|
||||
with pipelinable:
|
||||
# input : [B, 3, 32, 32]
|
||||
_ = resnet50()
|
||||
|
||||
pipelinable.policy = "customized"
|
||||
|
||||
exec_seq = [
|
||||
'conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3', 'layer4', 'avgpool', (flatten, "behind"), 'fc'
|
||||
]
|
||||
pipelinable.to_layer_list(exec_seq)
|
||||
partition = pipelinable.partition(chunk, stage_num, pp_rank)
|
||||
return partition
|
||||
|
||||
|
||||
def run_master(args):
|
||||
batch_size = args.batch_size
|
||||
chunk = args.chunk
|
||||
device = args.device
|
||||
world_size = args.world_size
|
||||
stage_num = world_size
|
||||
num_microbatches = args.num_microbatches
|
||||
|
||||
# build dataloader
|
||||
root = os.environ.get('DATA', './data')
|
||||
train_dataloader, test_dataloader = build_cifar(batch_size, root, padding=4, crop=32, resize=32)
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
|
||||
pp_engine = OneFOneBPipelineEngine(partition_fn=partition,
|
||||
stage_num=stage_num,
|
||||
num_microbatches=num_microbatches,
|
||||
device=device,
|
||||
chunk=chunk,
|
||||
criterion=criterion,
|
||||
checkpoint=False)
|
||||
|
||||
pp_engine.initialize_optimizer(torch.optim.Adam, lr=1e-3)
|
||||
s = time.time()
|
||||
|
||||
for bx, by in tqdm(train_dataloader):
|
||||
pp_engine.forward_backward(bx, labels=by, forward_only=False)
|
||||
|
||||
cost_time = time.time() - s
|
||||
|
||||
print("total cost time :", cost_time)
|
||||
print("cost time per batch:", cost_time / len(train_dataloader))
|
||||
|
||||
|
||||
@pytest.mark.skip("Test for performance, no need for CI")
|
||||
def main():
|
||||
args = parse_args()
|
||||
# this is due to limitation of partition function
|
||||
args.world_size = 2
|
||||
args.chunk = 1
|
||||
rpc_run(args, run_master)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@@ -7,7 +7,7 @@ import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
import colossalai.nn as col_nn
|
||||
import colossalai.legacy.nn as col_nn
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
|
@@ -7,7 +7,7 @@ import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
import colossalai.nn as col_nn
|
||||
import colossalai.legacy.nn as col_nn
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
|
@@ -7,7 +7,7 @@ import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
import colossalai.nn as col_nn
|
||||
import colossalai.legacy.nn as col_nn
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
|
@@ -7,7 +7,7 @@ import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
import colossalai.nn as col_nn
|
||||
import colossalai.legacy.nn as col_nn
|
||||
from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
|
Reference in New Issue
Block a user