added CI for unit testing (#69)

This commit is contained in:
Frank Lee
2021-12-16 10:32:08 +08:00
committed by GitHub
parent 45355a62f7
commit cd9c28e055
68 changed files with 1089 additions and 766 deletions

View File

@@ -6,7 +6,7 @@ from colossalai.nn import (Linear2p5D, LayerNorm2p5D, TransformerSelfAttention2p
TransformerLayer2p5D)
from colossalai.utils import get_current_device
from colossalai.utils import print_rank_0
from common import *
from .common import *
def check_linear():

View File

@@ -6,7 +6,7 @@ from colossalai.nn.layer.parallel_2p5d._operation import Matmul_AB_2p5D, Matmul_
Matmul_ATB_2p5D
from colossalai.utils import get_current_device
from colossalai.utils import print_rank_0
from common import *
from .common import *
def check_AB():

View File

@@ -1,3 +0,0 @@
#!/bin/bash
python -m torch.distributed.launch test_2p5d.py --nproc_per_node 8 --host $HOST --port 29516 --world_size 8

View File

@@ -1,9 +1,13 @@
import pytest
import torch
import torch.multiprocessing as mp
from colossalai.core import global_context as gpc
from colossalai.initialize import launch, get_default_parser
from test_layer import check_linear, check_layernorm, check_attention, check_mlp, check_transformerlayer
from test_operation import check_AB, check_ABT, check_ATB
from colossalai.initialize import launch
from checks_2p5d.check_layer_2p5d import check_linear, check_layernorm, check_attention, check_mlp, check_transformerlayer
from checks_2p5d.check_operation_2p5d import check_AB, check_ABT, check_ATB
from functools import partial
CONFIG = dict(
parallel=dict(
@@ -27,20 +31,25 @@ def check_layer():
check_transformerlayer()
@pytest.mark.dist
@pytest.mark.skip("This test should be invoked by test.sh in the same folder as it runs on multiple gpus")
def test_2p5d():
parser = get_default_parser()
args = parser.parse_args()
def check_layer_and_operation(rank, world_size):
launch(config=CONFIG,
rank=args.rank,
world_size=args.world_size,
host=args.host,
port=args.port,
backend=args.backend)
check_layer()
rank=rank,
world_size=world_size,
host='localhost',
port=29922,
backend='nccl')
check_operations()
check_layer()
gpc.destroy()
torch.cuda.empty_cache()
@pytest.mark.dist
def test_2p5d():
world_size = 8
run_func = partial(check_layer_and_operation, world_size=world_size)
mp.spawn(run_func, nprocs=world_size)
if __name__ == '__main__':