[test] refactor tests with spawn (#3452)

* [test] added spawn decorator

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code
This commit is contained in:
Frank Lee
2023-04-06 14:51:35 +08:00
committed by GitHub
parent 62f4e2eb07
commit 80eba05b0a
240 changed files with 1723 additions and 2342 deletions

View File

@@ -1,18 +1,14 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from functools import partial
import pytest
import torch
import torch.multiprocessing as mp
from checks_1d.check_layer_1d import *
from colossalai.core import global_context as gpc
from colossalai.initialize import launch
from colossalai.logging import disable_existing_loggers
from colossalai.testing import rerun_if_address_is_in_use
from colossalai.utils import free_port
from colossalai.testing import rerun_if_address_is_in_use, spawn
CONFIG = dict(parallel=dict(pipeline=dict(size=1), tensor=dict(size=4, mode='1d')),)
@@ -40,9 +36,7 @@ def check_layer(rank, world_size, port):
@pytest.mark.dist
@rerun_if_address_is_in_use()
def test_1d():
world_size = 4
run_func = partial(check_layer, world_size=world_size, port=free_port())
mp.spawn(run_func, nprocs=world_size)
spawn(check_layer, 4)
if __name__ == '__main__':

View File

@@ -1,22 +1,27 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from functools import partial
import pytest
import torch
import torch.multiprocessing as mp
from checks_2d.check_layer_2d import (
check_classifier_given_embed_weight,
check_classifier_no_given_weight,
check_embed,
check_layernorm,
check_linear,
check_loss,
check_patch_embed,
check_vocab_parallel_classifier_given_embed_weight,
check_vocab_parallel_classifier_no_given_weight,
check_vocab_parallel_embed,
check_vocab_parallel_loss,
)
from checks_2d.check_operation_2d import check_AB, check_ABT, check_ATB
from colossalai.core import global_context as gpc
from colossalai.initialize import launch
from colossalai.logging import disable_existing_loggers
from colossalai.utils import free_port
from colossalai.testing import rerun_if_address_is_in_use
from checks_2d.check_layer_2d import (check_classifier_given_embed_weight, check_classifier_no_given_weight,
check_embed, check_layernorm, check_linear, check_loss, check_patch_embed,
check_vocab_parallel_classifier_given_embed_weight,
check_vocab_parallel_classifier_no_given_weight, check_vocab_parallel_embed,
check_vocab_parallel_loss)
from checks_2d.check_operation_2d import check_AB, check_ABT, check_ATB
from colossalai.testing import rerun_if_address_is_in_use, spawn
CONFIG = dict(parallel=dict(pipeline=dict(size=1), tensor=dict(size=4, mode='2d')),)
@@ -57,9 +62,7 @@ def check_layer_and_operation(rank, world_size, port):
@pytest.mark.dist
@rerun_if_address_is_in_use()
def test_2d():
world_size = 4
run_func = partial(check_layer_and_operation, world_size=world_size, port=free_port())
mp.spawn(run_func, nprocs=world_size)
spawn(check_layer_and_operation, 4)
if __name__ == '__main__':

View File

@@ -1,15 +1,12 @@
from functools import partial
import pytest
import torch
import torch.multiprocessing as mp
from checks_2p5d.check_layer_2p5d import *
from checks_2p5d.check_operation_2p5d import check_AB, check_ABT, check_ATB
from colossalai.core import global_context as gpc
from colossalai.initialize import launch
from colossalai.logging import disable_existing_loggers
from colossalai.utils import free_port
from colossalai.testing import rerun_if_address_is_in_use
from checks_2p5d.check_layer_2p5d import *
from checks_2p5d.check_operation_2p5d import check_AB, check_ABT, check_ATB
from colossalai.testing import rerun_if_address_is_in_use, spawn
CONFIG = dict(parallel=dict(
pipeline=dict(size=1),
@@ -53,9 +50,7 @@ def check_layer_and_operation(rank, world_size, port):
@pytest.mark.dist
@rerun_if_address_is_in_use()
def test_2p5d():
world_size = 4
run_func = partial(check_layer_and_operation, world_size=world_size, port=free_port())
mp.spawn(run_func, nprocs=world_size)
spawn(check_layer_and_operation, 4)
if __name__ == '__main__':

View File

@@ -1,19 +1,24 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from functools import partial
import pytest
import torch
import torch.multiprocessing as mp
from checks_3d.check_layer_3d import (
check_classifier_no_given_weight,
check_embed,
check_layernorm,
check_linear,
check_loss,
check_patch_embed,
check_vocab_parallel_classifier_given_embed_weight,
check_vocab_parallel_classifier_no_given_weight,
check_vocab_parallel_embed,
check_vocab_parallel_loss,
)
from colossalai.core import global_context as gpc
from colossalai.initialize import launch
from colossalai.logging import disable_existing_loggers
from colossalai.utils import free_port
from colossalai.testing import rerun_if_address_is_in_use, skip_if_not_enough_gpus
from checks_3d.check_layer_3d import (check_classifier_no_given_weight, check_embed, check_layernorm, check_linear,
check_loss, check_patch_embed, check_vocab_parallel_classifier_given_embed_weight,
check_vocab_parallel_classifier_no_given_weight, check_vocab_parallel_embed,
check_vocab_parallel_loss)
from colossalai.testing import rerun_if_address_is_in_use, skip_if_not_enough_gpus, spawn
CONFIG = dict(
parallel=dict(
@@ -52,9 +57,7 @@ def check_layer_and_operation(rank, world_size, port):
@skip_if_not_enough_gpus(min_gpus=8)
@rerun_if_address_is_in_use()
def test_3d():
world_size = 8
run_func = partial(check_layer_and_operation, world_size=world_size, port=free_port())
mp.spawn(run_func, nprocs=world_size)
spawn(check_layer_and_operation, 8)
if __name__ == '__main__':

View File

@@ -1,20 +1,21 @@
import pytest
from functools import partial
import random
from typing import List
import numpy as np
import random
import pytest
import torch
import torch.multiprocessing as mp
import colossalai
from colossalai.utils import free_port
from colossalai.testing import rerun_if_address_is_in_use
from colossalai.tensor import ColoParameter, ProcessGroup, ShardSpec, ComputePattern, ComputeSpec, \
ColoTensor, ColoTensorSpec
from colossalai.nn.parallel.layers import CachedParamMgr, CachedEmbeddingBag, ParallelCachedEmbeddingBag, EvictionStrategy, \
ParallelCachedEmbeddingBagTablewise, TablewiseEmbeddingBagConfig
from typing import List
from colossalai.nn.parallel.layers import (
CachedEmbeddingBag,
CachedParamMgr,
EvictionStrategy,
ParallelCachedEmbeddingBag,
ParallelCachedEmbeddingBagTablewise,
TablewiseEmbeddingBagConfig,
)
from colossalai.tensor import ColoTensor, ComputePattern, ComputeSpec, ProcessGroup, ShardSpec
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
NUM_EMBED, EMBED_DIM = 10, 8
BATCH_SIZE = 8
@@ -44,6 +45,7 @@ def synthesize_1d_sparse_feature(
@pytest.mark.skip
@clear_cache_before_run()
def test_cachemgr():
model = torch.nn.EmbeddingBag(10000, 128)
# 10 chunks, 5 in cuda
@@ -72,6 +74,7 @@ def test_cachemgr():
assert mgr.cuda_available_chunk_num == 5
@clear_cache_before_run()
def test_reorder_with_freq():
num_embed = 100
chunk_size = 1
@@ -102,7 +105,8 @@ def test_reorder_with_freq():
f"offset in chunk: {offset_in_chunk}, mgr: {mgr_offsets}"
@pytest.mark.parametrize('use_LFU', [True, False])
@clear_cache_before_run()
@parameterize('use_LFU', [True, False])
def test_freq_aware_embed(use_LFU: bool):
device = torch.device('cuda', 0)
evict_strategy = EvictionStrategy.LFU if use_LFU else EvictionStrategy.DATASET
@@ -148,7 +152,8 @@ def test_freq_aware_embed(use_LFU: bool):
f"model weight: {model_weight[10:18, :8]}, reference: {ref_weight[10:18, :8]}"
@pytest.mark.parametrize('init_freq', [True, False])
@clear_cache_before_run()
@parameterize('init_freq', [True, False])
def test_lfu_strategy(init_freq: bool):
# minimal test to check behavior
Bag = CachedEmbeddingBag(5,
@@ -248,7 +253,7 @@ def run_parallel_freq_aware_embed_tablewise(rank, world_size):
input0 [1,2,3] [6,7] []
input1 [] [9] [13,15]
input2 [1,5] [6,8] [11]
↑ ↑ ↑
↑ ↑ ↑
rank 0 rank 0 rank 1
in KJT format
'''
@@ -363,8 +368,7 @@ def run_dist(rank, world_size, port):
@pytest.mark.parametrize('world_size', [1, 4])
@rerun_if_address_is_in_use()
def test_parallel_freq_aware_embed(world_size):
run_func = partial(run_dist, world_size=world_size, port=free_port())
mp.spawn(run_func, nprocs=world_size)
spawn(run_dist, world_size)
if __name__ == '__main__':

View File

@@ -1,14 +1,11 @@
import colossalai
import colossalai.nn as col_nn
import pytest
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
import pytest
from colossalai.core import global_context as gpc
import colossalai
from colossalai.context import ParallelMode
from colossalai.testing import rerun_if_address_is_in_use
from functools import partial
from colossalai.core import global_context as gpc
from colossalai.testing import rerun_if_address_is_in_use, spawn
CONFIG = dict(parallel=dict(tensor=dict(size=4, mode='sequence')))
@@ -121,8 +118,8 @@ def check_ring_av(rank, world_size):
'attention output cannot match'
def run_test(rank, world_size):
colossalai.launch(rank=rank, world_size=world_size, config=CONFIG, host='localhost', port=29500)
def run_test(rank, world_size, port):
colossalai.launch(rank=rank, world_size=world_size, config=CONFIG, host='localhost', port=port)
# check_ring_qk(rank, world_size)
check_ring_av(rank, world_size)
@@ -134,9 +131,7 @@ def run_test(rank, world_size):
@pytest.mark.dist
@rerun_if_address_is_in_use()
def test_sequence():
world_size = 4
run_func = partial(run_test, world_size=world_size)
mp.spawn(run_func, nprocs=world_size)
spawn(run_test, 4)
if __name__ == '__main__':