mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-22 01:48:07 +00:00
[test] refactor tests with spawn (#3452)
* [test] added spawn decorator * polish code * polish code * polish code * polish code * polish code * polish code
This commit is contained in:
@@ -1,18 +1,14 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
from functools import partial
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
from checks_1d.check_layer_1d import *
|
||||
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.logging import disable_existing_loggers
|
||||
from colossalai.testing import rerun_if_address_is_in_use
|
||||
from colossalai.utils import free_port
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
|
||||
CONFIG = dict(parallel=dict(pipeline=dict(size=1), tensor=dict(size=4, mode='1d')),)
|
||||
|
||||
@@ -40,9 +36,7 @@ def check_layer(rank, world_size, port):
|
||||
@pytest.mark.dist
|
||||
@rerun_if_address_is_in_use()
|
||||
def test_1d():
|
||||
world_size = 4
|
||||
run_func = partial(check_layer, world_size=world_size, port=free_port())
|
||||
mp.spawn(run_func, nprocs=world_size)
|
||||
spawn(check_layer, 4)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -1,22 +1,27 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
from functools import partial
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
from checks_2d.check_layer_2d import (
|
||||
check_classifier_given_embed_weight,
|
||||
check_classifier_no_given_weight,
|
||||
check_embed,
|
||||
check_layernorm,
|
||||
check_linear,
|
||||
check_loss,
|
||||
check_patch_embed,
|
||||
check_vocab_parallel_classifier_given_embed_weight,
|
||||
check_vocab_parallel_classifier_no_given_weight,
|
||||
check_vocab_parallel_embed,
|
||||
check_vocab_parallel_loss,
|
||||
)
|
||||
from checks_2d.check_operation_2d import check_AB, check_ABT, check_ATB
|
||||
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.logging import disable_existing_loggers
|
||||
from colossalai.utils import free_port
|
||||
from colossalai.testing import rerun_if_address_is_in_use
|
||||
from checks_2d.check_layer_2d import (check_classifier_given_embed_weight, check_classifier_no_given_weight,
|
||||
check_embed, check_layernorm, check_linear, check_loss, check_patch_embed,
|
||||
check_vocab_parallel_classifier_given_embed_weight,
|
||||
check_vocab_parallel_classifier_no_given_weight, check_vocab_parallel_embed,
|
||||
check_vocab_parallel_loss)
|
||||
from checks_2d.check_operation_2d import check_AB, check_ABT, check_ATB
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
|
||||
CONFIG = dict(parallel=dict(pipeline=dict(size=1), tensor=dict(size=4, mode='2d')),)
|
||||
|
||||
@@ -57,9 +62,7 @@ def check_layer_and_operation(rank, world_size, port):
|
||||
@pytest.mark.dist
|
||||
@rerun_if_address_is_in_use()
|
||||
def test_2d():
|
||||
world_size = 4
|
||||
run_func = partial(check_layer_and_operation, world_size=world_size, port=free_port())
|
||||
mp.spawn(run_func, nprocs=world_size)
|
||||
spawn(check_layer_and_operation, 4)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -1,15 +1,12 @@
|
||||
from functools import partial
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
from checks_2p5d.check_layer_2p5d import *
|
||||
from checks_2p5d.check_operation_2p5d import check_AB, check_ABT, check_ATB
|
||||
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.logging import disable_existing_loggers
|
||||
from colossalai.utils import free_port
|
||||
from colossalai.testing import rerun_if_address_is_in_use
|
||||
from checks_2p5d.check_layer_2p5d import *
|
||||
from checks_2p5d.check_operation_2p5d import check_AB, check_ABT, check_ATB
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
|
||||
CONFIG = dict(parallel=dict(
|
||||
pipeline=dict(size=1),
|
||||
@@ -53,9 +50,7 @@ def check_layer_and_operation(rank, world_size, port):
|
||||
@pytest.mark.dist
|
||||
@rerun_if_address_is_in_use()
|
||||
def test_2p5d():
|
||||
world_size = 4
|
||||
run_func = partial(check_layer_and_operation, world_size=world_size, port=free_port())
|
||||
mp.spawn(run_func, nprocs=world_size)
|
||||
spawn(check_layer_and_operation, 4)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -1,19 +1,24 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
from functools import partial
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
from checks_3d.check_layer_3d import (
|
||||
check_classifier_no_given_weight,
|
||||
check_embed,
|
||||
check_layernorm,
|
||||
check_linear,
|
||||
check_loss,
|
||||
check_patch_embed,
|
||||
check_vocab_parallel_classifier_given_embed_weight,
|
||||
check_vocab_parallel_classifier_no_given_weight,
|
||||
check_vocab_parallel_embed,
|
||||
check_vocab_parallel_loss,
|
||||
)
|
||||
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.logging import disable_existing_loggers
|
||||
from colossalai.utils import free_port
|
||||
from colossalai.testing import rerun_if_address_is_in_use, skip_if_not_enough_gpus
|
||||
from checks_3d.check_layer_3d import (check_classifier_no_given_weight, check_embed, check_layernorm, check_linear,
|
||||
check_loss, check_patch_embed, check_vocab_parallel_classifier_given_embed_weight,
|
||||
check_vocab_parallel_classifier_no_given_weight, check_vocab_parallel_embed,
|
||||
check_vocab_parallel_loss)
|
||||
from colossalai.testing import rerun_if_address_is_in_use, skip_if_not_enough_gpus, spawn
|
||||
|
||||
CONFIG = dict(
|
||||
parallel=dict(
|
||||
@@ -52,9 +57,7 @@ def check_layer_and_operation(rank, world_size, port):
|
||||
@skip_if_not_enough_gpus(min_gpus=8)
|
||||
@rerun_if_address_is_in_use()
|
||||
def test_3d():
|
||||
world_size = 8
|
||||
run_func = partial(check_layer_and_operation, world_size=world_size, port=free_port())
|
||||
mp.spawn(run_func, nprocs=world_size)
|
||||
spawn(check_layer_and_operation, 8)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -1,20 +1,21 @@
|
||||
import pytest
|
||||
from functools import partial
|
||||
import random
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
|
||||
import colossalai
|
||||
from colossalai.utils import free_port
|
||||
from colossalai.testing import rerun_if_address_is_in_use
|
||||
from colossalai.tensor import ColoParameter, ProcessGroup, ShardSpec, ComputePattern, ComputeSpec, \
|
||||
ColoTensor, ColoTensorSpec
|
||||
from colossalai.nn.parallel.layers import CachedParamMgr, CachedEmbeddingBag, ParallelCachedEmbeddingBag, EvictionStrategy, \
|
||||
ParallelCachedEmbeddingBagTablewise, TablewiseEmbeddingBagConfig
|
||||
from typing import List
|
||||
from colossalai.nn.parallel.layers import (
|
||||
CachedEmbeddingBag,
|
||||
CachedParamMgr,
|
||||
EvictionStrategy,
|
||||
ParallelCachedEmbeddingBag,
|
||||
ParallelCachedEmbeddingBagTablewise,
|
||||
TablewiseEmbeddingBagConfig,
|
||||
)
|
||||
from colossalai.tensor import ColoTensor, ComputePattern, ComputeSpec, ProcessGroup, ShardSpec
|
||||
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
|
||||
|
||||
NUM_EMBED, EMBED_DIM = 10, 8
|
||||
BATCH_SIZE = 8
|
||||
@@ -44,6 +45,7 @@ def synthesize_1d_sparse_feature(
|
||||
|
||||
|
||||
@pytest.mark.skip
|
||||
@clear_cache_before_run()
|
||||
def test_cachemgr():
|
||||
model = torch.nn.EmbeddingBag(10000, 128)
|
||||
# 10 chunks, 5 in cuda
|
||||
@@ -72,6 +74,7 @@ def test_cachemgr():
|
||||
assert mgr.cuda_available_chunk_num == 5
|
||||
|
||||
|
||||
@clear_cache_before_run()
|
||||
def test_reorder_with_freq():
|
||||
num_embed = 100
|
||||
chunk_size = 1
|
||||
@@ -102,7 +105,8 @@ def test_reorder_with_freq():
|
||||
f"offset in chunk: {offset_in_chunk}, mgr: {mgr_offsets}"
|
||||
|
||||
|
||||
@pytest.mark.parametrize('use_LFU', [True, False])
|
||||
@clear_cache_before_run()
|
||||
@parameterize('use_LFU', [True, False])
|
||||
def test_freq_aware_embed(use_LFU: bool):
|
||||
device = torch.device('cuda', 0)
|
||||
evict_strategy = EvictionStrategy.LFU if use_LFU else EvictionStrategy.DATASET
|
||||
@@ -148,7 +152,8 @@ def test_freq_aware_embed(use_LFU: bool):
|
||||
f"model weight: {model_weight[10:18, :8]}, reference: {ref_weight[10:18, :8]}"
|
||||
|
||||
|
||||
@pytest.mark.parametrize('init_freq', [True, False])
|
||||
@clear_cache_before_run()
|
||||
@parameterize('init_freq', [True, False])
|
||||
def test_lfu_strategy(init_freq: bool):
|
||||
# minimal test to check behavior
|
||||
Bag = CachedEmbeddingBag(5,
|
||||
@@ -248,7 +253,7 @@ def run_parallel_freq_aware_embed_tablewise(rank, world_size):
|
||||
input0 [1,2,3] [6,7] []
|
||||
input1 [] [9] [13,15]
|
||||
input2 [1,5] [6,8] [11]
|
||||
↑ ↑ ↑
|
||||
↑ ↑ ↑
|
||||
rank 0 rank 0 rank 1
|
||||
in KJT format
|
||||
'''
|
||||
@@ -363,8 +368,7 @@ def run_dist(rank, world_size, port):
|
||||
@pytest.mark.parametrize('world_size', [1, 4])
|
||||
@rerun_if_address_is_in_use()
|
||||
def test_parallel_freq_aware_embed(world_size):
|
||||
run_func = partial(run_dist, world_size=world_size, port=free_port())
|
||||
mp.spawn(run_func, nprocs=world_size)
|
||||
spawn(run_dist, world_size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -1,14 +1,11 @@
|
||||
import colossalai
|
||||
import colossalai.nn as col_nn
|
||||
import pytest
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
import torch.multiprocessing as mp
|
||||
import pytest
|
||||
|
||||
from colossalai.core import global_context as gpc
|
||||
import colossalai
|
||||
from colossalai.context import ParallelMode
|
||||
from colossalai.testing import rerun_if_address_is_in_use
|
||||
from functools import partial
|
||||
from colossalai.core import global_context as gpc
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
|
||||
CONFIG = dict(parallel=dict(tensor=dict(size=4, mode='sequence')))
|
||||
|
||||
@@ -121,8 +118,8 @@ def check_ring_av(rank, world_size):
|
||||
'attention output cannot match'
|
||||
|
||||
|
||||
def run_test(rank, world_size):
|
||||
colossalai.launch(rank=rank, world_size=world_size, config=CONFIG, host='localhost', port=29500)
|
||||
def run_test(rank, world_size, port):
|
||||
colossalai.launch(rank=rank, world_size=world_size, config=CONFIG, host='localhost', port=port)
|
||||
|
||||
# check_ring_qk(rank, world_size)
|
||||
check_ring_av(rank, world_size)
|
||||
@@ -134,9 +131,7 @@ def run_test(rank, world_size):
|
||||
@pytest.mark.dist
|
||||
@rerun_if_address_is_in_use()
|
||||
def test_sequence():
|
||||
world_size = 4
|
||||
run_func = partial(run_test, world_size=world_size)
|
||||
mp.spawn(run_func, nprocs=world_size)
|
||||
spawn(run_test, 4)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Reference in New Issue
Block a user