mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-02 17:46:42 +00:00
[workflow] fixed build CI (#5240)
* [workflow] fixed build CI * polish * polish * polish * polish * polish
This commit is contained in:
@@ -1,5 +1,33 @@
|
||||
from . import custom, diffusers, timm, torchaudio, torchrec, torchvision, transformers
|
||||
import os
|
||||
from . import custom, diffusers, timm, torchaudio, torchvision, transformers
|
||||
from .executor import run_fwd, run_fwd_bwd
|
||||
from .registry import model_zoo
|
||||
|
||||
__all__ = ["model_zoo", "run_fwd", "run_fwd_bwd"]
|
||||
# We pick a subset of models for fast testing in order to reduce the total testing time
|
||||
COMMON_MODELS = [
|
||||
'custom_hanging_param_model',
|
||||
'custom_nested_model',
|
||||
'custom_repeated_computed_layers',
|
||||
'custom_simple_net',
|
||||
'diffusers_clip_text_model',
|
||||
'diffusers_auto_encoder_kl',
|
||||
'diffusers_unet2d_model',
|
||||
'timm_densenet',
|
||||
'timm_resnet',
|
||||
'timm_swin_transformer',
|
||||
'torchaudio_wav2vec2_base',
|
||||
'torchaudio_conformer',
|
||||
'transformers_bert_for_masked_lm',
|
||||
'transformers_bloom_for_causal_lm',
|
||||
'transformers_falcon_for_causal_lm',
|
||||
'transformers_chatglm_for_conditional_generation',
|
||||
'transformers_llama_for_casual_lm',
|
||||
'transformers_vit_for_masked_image_modeling',
|
||||
'transformers_mistral_for_casual_lm'
|
||||
]
|
||||
|
||||
IS_FAST_TEST = os.environ.get('FAST_TEST', '0') == '1'
|
||||
|
||||
|
||||
__all__ = ["model_zoo", "run_fwd", "run_fwd_bwd", 'COMMON_MODELS', 'IS_FAST_TEST']
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable
|
||||
from typing import Callable, List, Union
|
||||
|
||||
__all__ = ["ModelZooRegistry", "ModelAttribute", "model_zoo"]
|
||||
|
||||
@@ -61,7 +61,7 @@ class ModelZooRegistry(dict):
|
||||
"""
|
||||
self[name] = (model_fn, data_gen_fn, output_transform_fn, loss_fn, model_attribute)
|
||||
|
||||
def get_sub_registry(self, keyword: str):
|
||||
def get_sub_registry(self, keyword: Union[str, List[str]]):
|
||||
"""
|
||||
Get a sub registry with models that contain the keyword.
|
||||
|
||||
@@ -70,12 +70,15 @@ class ModelZooRegistry(dict):
|
||||
"""
|
||||
new_dict = dict()
|
||||
|
||||
if isinstance(keyword, str):
|
||||
keyword_list = [keyword]
|
||||
else:
|
||||
keyword_list = keyword
|
||||
assert isinstance(keyword_list, (list, tuple))
|
||||
|
||||
for k, v in self.items():
|
||||
if keyword == "transformers_gpt":
|
||||
if keyword in k and not "gptj" in k: # ensure GPT2 does not retrieve GPTJ models
|
||||
new_dict[k] = v
|
||||
else:
|
||||
if keyword in k:
|
||||
for kw in keyword_list:
|
||||
if kw in k:
|
||||
new_dict[k] = v
|
||||
|
||||
assert len(new_dict) > 0, f"No model found with keyword {keyword}"
|
||||
|
@@ -13,7 +13,7 @@ from colossalai.lazy.lazy_init import LazyInitContext
|
||||
from colossalai.nn.optimizer import HybridAdam
|
||||
from colossalai.tensor.colo_parameter import ColoParameter
|
||||
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
|
||||
from tests.kit.model_zoo import model_zoo
|
||||
from tests.kit.model_zoo import model_zoo, COMMON_MODELS, IS_FAST_TEST
|
||||
|
||||
|
||||
def run_fn(init_method, model_fn, data_gen_fn, output_transform_fn, zero_size, tp_size) -> Optional[str]:
|
||||
@@ -66,7 +66,7 @@ def run_fn(init_method, model_fn, data_gen_fn, output_transform_fn, zero_size, t
|
||||
# @parameterize('init_method', ['lazy', 'none', 'colo'])
|
||||
|
||||
|
||||
@parameterize("subset", ["torchvision", "transformers", "diffusers"])
|
||||
@parameterize("subset", [COMMON_MODELS] if IS_FAST_TEST else ["torchvision", "transformers", "diffusers"])
|
||||
@parameterize("init_method", ["none"])
|
||||
@parameterize("zero_size", [2])
|
||||
@parameterize("tp_size", [2])
|
||||
|
@@ -11,7 +11,7 @@ from colossalai.booster.plugin import LowLevelZeroPlugin
|
||||
|
||||
# from colossalai.nn.optimizer import HybridAdam
|
||||
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
|
||||
from tests.kit.model_zoo import model_zoo
|
||||
from tests.kit.model_zoo import model_zoo, IS_FAST_TEST, COMMON_MODELS
|
||||
|
||||
# These models are not compatible with AMP
|
||||
_AMP_ERR_MODELS = ["timm_convit", "deepfm_interactionarch"]
|
||||
@@ -62,7 +62,12 @@ def check_low_level_zero_plugin(stage: int, early_stop: bool = True):
|
||||
ignore_models = _AMP_ERR_MODELS + _LOW_LEVEL_ZERO_ERR_MODELS + _STUCK_MODELS
|
||||
skipped_models = []
|
||||
|
||||
for name, (model_fn, data_gen_fn, output_transform_fn, _, _) in model_zoo.items():
|
||||
if IS_FAST_TEST:
|
||||
registry = model_zoo.get_sub_registry(COMMON_MODELS)
|
||||
else:
|
||||
registry = model_zoo
|
||||
|
||||
for name, (model_fn, data_gen_fn, output_transform_fn, _, _) in registry.items():
|
||||
# FIXME(ver217): fix these models
|
||||
if name in ignore_models:
|
||||
skipped_models.append(name)
|
||||
|
@@ -11,7 +11,7 @@ from colossalai.booster import Booster
|
||||
from colossalai.booster.plugin import TorchDDPPlugin
|
||||
from colossalai.interface import OptimizerWrapper
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
from tests.kit.model_zoo import model_zoo
|
||||
from tests.kit.model_zoo import model_zoo, IS_FAST_TEST, COMMON_MODELS
|
||||
|
||||
|
||||
def run_fn(model_fn, data_gen_fn, output_transform_fn):
|
||||
@@ -40,7 +40,12 @@ def run_fn(model_fn, data_gen_fn, output_transform_fn):
|
||||
|
||||
|
||||
def check_torch_ddp_plugin():
|
||||
for name, (model_fn, data_gen_fn, output_transform_fn, _, _) in model_zoo.items():
|
||||
if IS_FAST_TEST:
|
||||
registry = model_zoo.get_sub_registry(COMMON_MODELS)
|
||||
else:
|
||||
registry = model_zoo
|
||||
|
||||
for name, (model_fn, data_gen_fn, output_transform_fn, _, _) in registry.items():
|
||||
if name == "dlrm_interactionarch":
|
||||
continue
|
||||
run_fn(model_fn, data_gen_fn, output_transform_fn)
|
||||
|
@@ -12,7 +12,7 @@ if version.parse(torch.__version__) >= version.parse("1.12.0"):
|
||||
|
||||
from colossalai.interface import OptimizerWrapper
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
from tests.kit.model_zoo import model_zoo
|
||||
from tests.kit.model_zoo import model_zoo, IS_FAST_TEST, COMMON_MODELS
|
||||
|
||||
|
||||
# test basic fsdp function
|
||||
@@ -42,7 +42,12 @@ def run_fn(model_fn, data_gen_fn, output_transform_fn):
|
||||
|
||||
|
||||
def check_torch_fsdp_plugin():
|
||||
for name, (model_fn, data_gen_fn, output_transform_fn, _, _) in model_zoo.items():
|
||||
if IS_FAST_TEST:
|
||||
registry = model_zoo.get_sub_registry(COMMON_MODELS)
|
||||
else:
|
||||
registry = model_zoo
|
||||
|
||||
for name, (model_fn, data_gen_fn, output_transform_fn, _, _) in registry.items():
|
||||
if any(
|
||||
element in name
|
||||
for element in [
|
||||
|
@@ -7,6 +7,7 @@ from transformers import LlamaForCausalLM
|
||||
from utils import shared_tempdir
|
||||
|
||||
import colossalai
|
||||
from colossalai.testing import skip_if_not_enough_gpus
|
||||
from colossalai.booster import Booster
|
||||
from colossalai.booster.plugin import GeminiPlugin
|
||||
from colossalai.lazy import LazyInitContext
|
||||
@@ -68,7 +69,7 @@ def exam_state_dict_with_origin(placement_config, model_name, use_safetensors: b
|
||||
@clear_cache_before_run()
|
||||
@parameterize("placement_config", OPTIM_PLACEMENT_CONFIGS)
|
||||
@parameterize("shard", [True, False])
|
||||
@parameterize("model_name", ["transformers_gpt"])
|
||||
@parameterize("model_name", ["transformers_llama_for_casual_lm"])
|
||||
@parameterize("size_per_shard", [32])
|
||||
@parameterize("tp_size", [1, 2])
|
||||
@parameterize("zero_size", [2])
|
||||
@@ -156,13 +157,12 @@ def run_dist(rank, world_size, port):
|
||||
|
||||
|
||||
@pytest.mark.dist
|
||||
@pytest.mark.parametrize("world_size", [4])
|
||||
@rerun_if_address_is_in_use()
|
||||
def test_gemini_ckpIO(world_size):
|
||||
spawn(run_dist, world_size)
|
||||
def test_gemini_ckpIO():
|
||||
spawn(run_dist, 4)
|
||||
|
||||
@pytest.mark.largedist
|
||||
@pytest.mark.parametrize("world_size", [8])
|
||||
@skip_if_not_enough_gpus(min_gpus=8)
|
||||
@rerun_if_address_is_in_use()
|
||||
def test_gemini_ckpIO_3d(world_size):
|
||||
spawn(run_dist, world_size)
|
||||
def test_gemini_ckpIO_3d():
|
||||
spawn(run_dist, 8)
|
@@ -20,7 +20,7 @@ from tests.kit.model_zoo import model_zoo
|
||||
|
||||
@clear_cache_before_run()
|
||||
@parameterize("shard", [False, True])
|
||||
@parameterize("model_name", ["transformers_gpt"])
|
||||
@parameterize("model_name", ["transformers_llama_for_casual_lm"])
|
||||
def exam_torch_load_from_gemini(shard: bool, model_name: str):
|
||||
(model_fn, data_gen_fn, output_transform_fn, _, _) = next(iter(model_zoo.get_sub_registry(model_name).values()))
|
||||
criterion = lambda x: x.mean()
|
||||
|
@@ -40,7 +40,7 @@ else:
|
||||
|
||||
@clear_cache_before_run()
|
||||
@parameterize("shard", [True, False])
|
||||
@parameterize("model_name", ["transformers_gpt"])
|
||||
@parameterize("model_name", ["transformers_llama_for_casual_lm"])
|
||||
@parameterize("size_per_shard", [32])
|
||||
@parameterize("test_config", TEST_CONFIGS)
|
||||
def exam_state_dict(shard: bool, model_name: str, size_per_shard: int, test_config: dict):
|
||||
|
@@ -18,7 +18,7 @@ from tests.kit.model_zoo import model_zoo
|
||||
|
||||
|
||||
@clear_cache_before_run()
|
||||
@parameterize("model_name", ["transformers_gpt"])
|
||||
@parameterize("model_name", ["transformers_llama_for_casual_lm"])
|
||||
@parameterize("plugin_type", ["ddp", "zero", "gemini"])
|
||||
def exam_from_pretrained(plugin_type: str, model_name: str, shard=True, size_per_shard=32):
|
||||
(model_fn, data_gen_fn, output_transform_fn, loss_fn, _) = next(
|
||||
|
@@ -1,11 +1,11 @@
|
||||
import pytest
|
||||
from lazy_init_utils import SUPPORT_LAZY, check_lazy_init
|
||||
|
||||
from tests.kit.model_zoo import model_zoo
|
||||
from tests.kit.model_zoo import model_zoo, IS_FAST_TEST, COMMON_MODELS
|
||||
|
||||
|
||||
@pytest.mark.skipif(not SUPPORT_LAZY, reason="requires torch >= 1.12.0")
|
||||
@pytest.mark.parametrize("subset", ["torchvision", "diffusers", "timm", "transformers", "torchaudio", "deepfm", "dlrm"])
|
||||
@pytest.mark.parametrize("subset", [COMMON_MODELS] if IS_FAST_TEST else ["torchvision", "diffusers", "timm", "transformers", "torchaudio", "deepfm", "dlrm"])
|
||||
@pytest.mark.parametrize("default_device", ["cpu", "cuda"])
|
||||
def test_torchvision_models_lazy_init(subset, default_device):
|
||||
sub_model_zoo = model_zoo.get_sub_registry(subset)
|
||||
|
Reference in New Issue
Block a user