added CI for unit testing (#69)

This commit is contained in:
Frank Lee
2021-12-16 10:32:08 +08:00
committed by GitHub
parent 45355a62f7
commit cd9c28e055
68 changed files with 1089 additions and 766 deletions

View File

@@ -13,7 +13,7 @@ from colossalai.utils import get_current_device, print_rank_0
from colossalai.nn.layer.parallel_3d._utils import get_parallel_mode_from_env
from colossalai.constants import INPUT_GROUP_3D, WEIGHT_GROUP_3D, OUTPUT_GROUP_3D
from common import *
from .common import *
def check_linear():

View File

@@ -7,7 +7,7 @@ from colossalai.logging import get_dist_logger
from colossalai.nn.layer.parallel_3d._operation import *
from colossalai.utils import get_current_device
from common import *
from .common import *
def check_AB():

View File

@@ -1,22 +0,0 @@
#!/bin/bash
python -m torch.distributed.launch test_2d.py --nproc_per_node 8 test_3d.py --host $HOST --port 29516 --world_size 8
# expected test output
# distributed environment initialized
# AB forward: pass
# AB backward: pass
# ABT forward: pass
# ABT backward: pass
# ATB forward: pass
# ATB backward: pass
# linear backward: pass
# linear backward: pass
# layer norm forward: pass
# layer norm backward: pass
# self attention forward: pass
# self attention backward: pass
# mlp forward: pass
# mlp backward: pass
# transformerlayer forward: pass
# transformerlayer backward: pass

View File

@@ -1,11 +1,14 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import pytest
import torch
import torch.multiprocessing as mp
from colossalai.initialize import launch, get_default_parser
from test_layer import *
from test_operation import *
from checks_3d.check_layer_3d import *
from checks_3d.check_operation_3d import *
from colossalai.logging import get_dist_logger
from functools import partial
CONFIG = dict(parallel=dict(pipeline=1, tensor=dict(mode='3d', size=8)),
seed=0)
@@ -38,26 +41,25 @@ def check_layer():
ranks=[0])
def _test_main():
# init dist
parser = get_default_parser()
args = parser.parse_args()
def check_layer_and_operation(rank, world_size):
launch(config=CONFIG,
rank=args.rank,
world_size=args.world_size,
host=args.host,
port=args.port,
backend=args.backend)
logger = get_dist_logger()
logger.info('Distributed environment is initialzied.', ranks=[0])
torch.backends.cudnn.benchmark = True
rank=rank,
world_size=world_size,
host='localhost',
port=29923,
backend='nccl')
# check operation
# check_operations()
# check layers
check_layer()
gpc.destroy()
torch.cuda.empty_cache()
@pytest.mark.dist
def test_3d():
world_size = 8
run_func = partial(check_layer_and_operation, world_size=world_size)
mp.spawn(run_func, nprocs=world_size)
if __name__ == '__main__':
_test_main()
test_3d()