mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-08 04:24:47 +00:00
added CI for unit testing (#69)
This commit is contained in:
@@ -1,4 +0,0 @@
|
||||
#!/usr/bin/env sh
|
||||
test_file=$1
|
||||
|
||||
python $test_file --rank $SLURM_PROCID --world_size $SLURM_NPROCS --host $HOST --port 29500
|
0
tests/test_layers/test_1d/checks_1d/__init__.py
Normal file
0
tests/test_layers/test_1d/checks_1d/__init__.py
Normal file
@@ -1,4 +1,3 @@
|
||||
from tests.test_layers.test_3d.common import IMG_SIZE
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
from torch.nn import Parameter
|
||||
@@ -7,7 +6,7 @@ from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.nn import Linear1D_Col, Linear1D_Row, TransformerMLP1D, TransformerSelfAttention1D, ViTMLP1D, ViTSelfAttention1D, ViTPatchEmbedding1D, ViTHead1D, ViTTokenFuser1D
|
||||
from colossalai.utils import get_current_device, print_rank_0
|
||||
from common import HIDDEN_SIZE, DEPTH, BATCH_SIZE, SEQ_LENGTH, NUM_CLASSES, check_equal, IMG_SIZE
|
||||
from .common import HIDDEN_SIZE, DEPTH, BATCH_SIZE, SEQ_LENGTH, NUM_CLASSES, check_equal, IMG_SIZE
|
||||
|
||||
|
||||
def check_linear_col():
|
@@ -2,10 +2,13 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch, get_default_parser
|
||||
from test_layer import *
|
||||
from functools import partial
|
||||
from checks_1d.check_layer_1d import *
|
||||
|
||||
CONFIG = dict(
|
||||
parallel=dict(
|
||||
@@ -18,8 +21,14 @@ CONFIG = dict(
|
||||
)
|
||||
|
||||
|
||||
def check_layer():
|
||||
# print_rank_0('start check_linear_col')
|
||||
def check_layer(rank, world_size):
|
||||
launch(config=CONFIG,
|
||||
rank=rank,
|
||||
world_size=world_size,
|
||||
host='localhost',
|
||||
port=29920,
|
||||
backend='nccl')
|
||||
|
||||
check_linear_col()
|
||||
check_linear_row()
|
||||
check_attention()
|
||||
@@ -28,21 +37,15 @@ def check_layer():
|
||||
check_embed()
|
||||
check_head()
|
||||
|
||||
gpc.destroy()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
@pytest.mark.dist
|
||||
@pytest.mark.skip("This test should be invoked by test.sh in the same folder as it runs on multiple gpus")
|
||||
def test_1d():
|
||||
parser = get_default_parser()
|
||||
args = parser.parse_args()
|
||||
launch(config=CONFIG,
|
||||
rank=args.rank,
|
||||
world_size=args.world_size,
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
backend=args.backend)
|
||||
|
||||
check_layer()
|
||||
gpc.destroy()
|
||||
world_size = 2
|
||||
run_func = partial(check_layer, world_size=world_size)
|
||||
mp.spawn(run_func, nprocs=world_size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
0
tests/test_layers/test_2d/checks_2d/__init__.py
Normal file
0
tests/test_layers/test_2d/checks_2d/__init__.py
Normal file
@@ -5,7 +5,7 @@ from colossalai.context.parallel_mode import ParallelMode
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.nn import Linear2D, LayerNorm2D, TransformerSelfAttention2D, TransformerMLP2D, TransformerLayer2D
|
||||
from colossalai.utils import get_current_device, print_rank_0
|
||||
from common import HIDDEN_SIZE, DEPTH, BATCH_SIZE, SEQ_LENGTH, check_equal
|
||||
from .common import HIDDEN_SIZE, DEPTH, BATCH_SIZE, SEQ_LENGTH, check_equal
|
||||
|
||||
|
||||
def check_linear():
|
@@ -8,7 +8,7 @@ from colossalai.core import global_context as gpc
|
||||
from colossalai.nn.layer.parallel_2d import Matmul_AB_2D, Matmul_ABT_2D, Matmul_ATB_2D
|
||||
from colossalai.utils import get_current_device
|
||||
from colossalai.utils import print_rank_0
|
||||
from common import check_equal, BATCH_SIZE, SEQ_LENGTH, HIDDEN_SIZE, DEPTH
|
||||
from .common import check_equal, BATCH_SIZE, SEQ_LENGTH, HIDDEN_SIZE, DEPTH
|
||||
|
||||
|
||||
def check_AB():
|
@@ -2,11 +2,15 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch, get_default_parser
|
||||
from test_layer import check_linear, check_layernorm, check_attention, check_mlp, check_transformerlayer
|
||||
from test_operation import check_AB, check_ABT, check_ATB
|
||||
from checks_2d.check_layer_2d import check_linear, check_layernorm, check_attention, check_mlp, check_transformerlayer
|
||||
from checks_2d.check_operation_2d import check_AB, check_ABT, check_ATB
|
||||
from functools import partial
|
||||
|
||||
|
||||
CONFIG = dict(
|
||||
parallel=dict(
|
||||
@@ -33,20 +37,25 @@ def check_layer():
|
||||
check_transformerlayer()
|
||||
|
||||
|
||||
@pytest.mark.dist
|
||||
@pytest.mark.skip("This test should be invoked by test.sh in the same folder as it runs on multiple gpus")
|
||||
def test_2d():
|
||||
parser = get_default_parser()
|
||||
args = parser.parse_args()
|
||||
def check_layer_and_operation(rank, world_size):
|
||||
launch(config=CONFIG,
|
||||
rank=args.rank,
|
||||
world_size=args.world_size,
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
backend=args.backend)
|
||||
rank=rank,
|
||||
world_size=world_size,
|
||||
host='localhost',
|
||||
port=29921,
|
||||
backend='nccl')
|
||||
|
||||
check_operations()
|
||||
check_layer()
|
||||
gpc.destroy()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
@pytest.mark.dist
|
||||
def test_2d():
|
||||
world_size = 4
|
||||
run_func = partial(check_layer_and_operation, world_size=world_size)
|
||||
mp.spawn(run_func, nprocs=world_size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
0
tests/test_layers/test_2p5d/checks_2p5d/__init__.py
Normal file
0
tests/test_layers/test_2p5d/checks_2p5d/__init__.py
Normal file
@@ -6,7 +6,7 @@ from colossalai.nn import (Linear2p5D, LayerNorm2p5D, TransformerSelfAttention2p
|
||||
TransformerLayer2p5D)
|
||||
from colossalai.utils import get_current_device
|
||||
from colossalai.utils import print_rank_0
|
||||
from common import *
|
||||
from .common import *
|
||||
|
||||
|
||||
def check_linear():
|
@@ -6,7 +6,7 @@ from colossalai.nn.layer.parallel_2p5d._operation import Matmul_AB_2p5D, Matmul_
|
||||
Matmul_ATB_2p5D
|
||||
from colossalai.utils import get_current_device
|
||||
from colossalai.utils import print_rank_0
|
||||
from common import *
|
||||
from .common import *
|
||||
|
||||
|
||||
def check_AB():
|
@@ -1,3 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
python -m torch.distributed.launch test_2p5d.py --nproc_per_node 8 --host $HOST --port 29516 --world_size 8
|
@@ -1,9 +1,13 @@
|
||||
import pytest
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch, get_default_parser
|
||||
from test_layer import check_linear, check_layernorm, check_attention, check_mlp, check_transformerlayer
|
||||
from test_operation import check_AB, check_ABT, check_ATB
|
||||
from colossalai.initialize import launch
|
||||
from checks_2p5d.check_layer_2p5d import check_linear, check_layernorm, check_attention, check_mlp, check_transformerlayer
|
||||
from checks_2p5d.check_operation_2p5d import check_AB, check_ABT, check_ATB
|
||||
from functools import partial
|
||||
|
||||
|
||||
CONFIG = dict(
|
||||
parallel=dict(
|
||||
@@ -27,20 +31,25 @@ def check_layer():
|
||||
check_transformerlayer()
|
||||
|
||||
|
||||
@pytest.mark.dist
|
||||
@pytest.mark.skip("This test should be invoked by test.sh in the same folder as it runs on multiple gpus")
|
||||
def test_2p5d():
|
||||
parser = get_default_parser()
|
||||
args = parser.parse_args()
|
||||
def check_layer_and_operation(rank, world_size):
|
||||
launch(config=CONFIG,
|
||||
rank=args.rank,
|
||||
world_size=args.world_size,
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
backend=args.backend)
|
||||
check_layer()
|
||||
rank=rank,
|
||||
world_size=world_size,
|
||||
host='localhost',
|
||||
port=29922,
|
||||
backend='nccl')
|
||||
|
||||
check_operations()
|
||||
check_layer()
|
||||
gpc.destroy()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
@pytest.mark.dist
|
||||
def test_2p5d():
|
||||
world_size = 8
|
||||
run_func = partial(check_layer_and_operation, world_size=world_size)
|
||||
mp.spawn(run_func, nprocs=world_size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
0
tests/test_layers/test_3d/checks_3d/__init__.py
Normal file
0
tests/test_layers/test_3d/checks_3d/__init__.py
Normal file
@@ -13,7 +13,7 @@ from colossalai.utils import get_current_device, print_rank_0
|
||||
from colossalai.nn.layer.parallel_3d._utils import get_parallel_mode_from_env
|
||||
from colossalai.constants import INPUT_GROUP_3D, WEIGHT_GROUP_3D, OUTPUT_GROUP_3D
|
||||
|
||||
from common import *
|
||||
from .common import *
|
||||
|
||||
|
||||
def check_linear():
|
@@ -7,7 +7,7 @@ from colossalai.logging import get_dist_logger
|
||||
from colossalai.nn.layer.parallel_3d._operation import *
|
||||
from colossalai.utils import get_current_device
|
||||
|
||||
from common import *
|
||||
from .common import *
|
||||
|
||||
|
||||
def check_AB():
|
@@ -1,22 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
python -m torch.distributed.launch test_2d.py --nproc_per_node 8 test_3d.py --host $HOST --port 29516 --world_size 8
|
||||
|
||||
# expected test output
|
||||
# distributed environment initialized
|
||||
# AB forward: pass
|
||||
# AB backward: pass
|
||||
# ABT forward: pass
|
||||
# ABT backward: pass
|
||||
# ATB forward: pass
|
||||
# ATB backward: pass
|
||||
# linear backward: pass
|
||||
# linear backward: pass
|
||||
# layer norm forward: pass
|
||||
# layer norm backward: pass
|
||||
# self attention forward: pass
|
||||
# self attention backward: pass
|
||||
# mlp forward: pass
|
||||
# mlp backward: pass
|
||||
# transformerlayer forward: pass
|
||||
# transformerlayer backward: pass
|
@@ -1,11 +1,14 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
from colossalai.initialize import launch, get_default_parser
|
||||
|
||||
from test_layer import *
|
||||
from test_operation import *
|
||||
from checks_3d.check_layer_3d import *
|
||||
from checks_3d.check_operation_3d import *
|
||||
from colossalai.logging import get_dist_logger
|
||||
from functools import partial
|
||||
|
||||
CONFIG = dict(parallel=dict(pipeline=1, tensor=dict(mode='3d', size=8)),
|
||||
seed=0)
|
||||
@@ -38,26 +41,25 @@ def check_layer():
|
||||
ranks=[0])
|
||||
|
||||
|
||||
def _test_main():
|
||||
# init dist
|
||||
parser = get_default_parser()
|
||||
args = parser.parse_args()
|
||||
def check_layer_and_operation(rank, world_size):
|
||||
launch(config=CONFIG,
|
||||
rank=args.rank,
|
||||
world_size=args.world_size,
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
backend=args.backend)
|
||||
logger = get_dist_logger()
|
||||
logger.info('Distributed environment is initialzied.', ranks=[0])
|
||||
torch.backends.cudnn.benchmark = True
|
||||
rank=rank,
|
||||
world_size=world_size,
|
||||
host='localhost',
|
||||
port=29923,
|
||||
backend='nccl')
|
||||
|
||||
# check operation
|
||||
# check_operations()
|
||||
|
||||
# check layers
|
||||
check_layer()
|
||||
gpc.destroy()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
@pytest.mark.dist
|
||||
def test_3d():
|
||||
world_size = 8
|
||||
run_func = partial(check_layer_and_operation, world_size=world_size)
|
||||
mp.spawn(run_func, nprocs=world_size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
_test_main()
|
||||
test_3d()
|
||||
|
@@ -1,9 +1,14 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
from colossalai.initialize import launch, get_default_parser
|
||||
from colossalai.logging import get_dist_logger
|
||||
from test_layer import *
|
||||
from checks_seq.check_layer_seq import *
|
||||
from functools import partial
|
||||
|
||||
|
||||
CONFIG = dict(
|
||||
parallel=dict(
|
||||
@@ -17,24 +22,28 @@ def check_layer():
|
||||
check_selfattention()
|
||||
|
||||
|
||||
def _test_main():
|
||||
def run_check_sequence(rank, world_size):
|
||||
# init dist
|
||||
parser = get_default_parser()
|
||||
args = parser.parse_args()
|
||||
launch(config=CONFIG,
|
||||
rank=args.rank,
|
||||
world_size=args.world_size,
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
backend=args.backend)
|
||||
rank=rank,
|
||||
world_size=world_size,
|
||||
host='localhost',
|
||||
port=29924,
|
||||
backend='nccl')
|
||||
logger = get_dist_logger()
|
||||
logger.info('Distributed environment is initialzied.', ranks=[0])
|
||||
|
||||
torch.backends.cudnn.benchmark = True
|
||||
|
||||
# check layers
|
||||
check_layer()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
@pytest.mark.dist
|
||||
def test_sequence():
|
||||
world_size = 4
|
||||
run_func = partial(run_check_sequence, world_size=world_size)
|
||||
mp.spawn(run_func, nprocs=world_size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
_test_main()
|
||||
test_sequence()
|
||||
|
Reference in New Issue
Block a user