mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-01 01:06:00 +00:00
[misc] update pre-commit and run all files (#4752)
* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
This commit is contained in:
@@ -3,6 +3,6 @@ from .config import Config, ConfigException
|
||||
# from .moe_context import MOE_CONTEXT
|
||||
|
||||
__all__ = [
|
||||
'Config',
|
||||
'ConfigException',
|
||||
"Config",
|
||||
"ConfigException",
|
||||
]
|
||||
|
@@ -5,6 +5,7 @@ import inspect
|
||||
import sys
|
||||
from importlib.machinery import SourceFileLoader
|
||||
from pathlib import Path
|
||||
|
||||
from colossalai.logging import get_dist_logger
|
||||
|
||||
|
||||
@@ -41,7 +42,7 @@ class Config(dict):
|
||||
self.__setattr__(key, value)
|
||||
|
||||
def update(self, config):
|
||||
assert isinstance(config, (Config, dict)), 'can only update dictionary or Config objects.'
|
||||
assert isinstance(config, (Config, dict)), "can only update dictionary or Config objects."
|
||||
for k, v in config.items():
|
||||
self._add_item(k, v)
|
||||
return self
|
||||
@@ -66,11 +67,11 @@ class Config(dict):
|
||||
elif isinstance(filename, Path):
|
||||
filepath = filename.absolute()
|
||||
|
||||
assert filepath.exists(), f'{filename} is not found, please check your configuration path'
|
||||
assert filepath.exists(), f"{filename} is not found, please check your configuration path"
|
||||
|
||||
# check extension
|
||||
extension = filepath.suffix
|
||||
assert extension == '.py', 'only .py files are supported'
|
||||
assert extension == ".py", "only .py files are supported"
|
||||
|
||||
# import the config as module
|
||||
remove_path = False
|
||||
@@ -86,13 +87,13 @@ class Config(dict):
|
||||
config = Config()
|
||||
|
||||
for k, v in module.__dict__.items():
|
||||
if k.startswith('__') or inspect.ismodule(v) or inspect.isclass(v):
|
||||
if k.startswith("__") or inspect.ismodule(v) or inspect.isclass(v):
|
||||
continue
|
||||
else:
|
||||
config._add_item(k, v)
|
||||
|
||||
logger = get_dist_logger()
|
||||
logger.debug('variables which starts with __, is a module or class declaration are omitted in config file')
|
||||
logger.debug("variables which starts with __, is a module or class declaration are omitted in config file")
|
||||
|
||||
# remove module
|
||||
del sys.modules[module_name]
|
||||
|
@@ -9,14 +9,13 @@ from colossalai.legacy.tensor import ProcessGroup
|
||||
|
||||
def _check_sanity():
|
||||
from colossalai.legacy.core import global_context as gpc
|
||||
|
||||
if gpc.tensor_parallel_size > 1 or gpc.pipeline_parallel_size > 1:
|
||||
raise NotImplementedError("Moe is not compatible with tensor or "
|
||||
"pipeline parallel at present.")
|
||||
raise NotImplementedError("Moe is not compatible with tensor or " "pipeline parallel at present.")
|
||||
|
||||
|
||||
class MoeParallelInfo:
|
||||
"""Moe parallelism information, storing parallel sizes and groups.
|
||||
"""
|
||||
"""Moe parallelism information, storing parallel sizes and groups."""
|
||||
|
||||
def __init__(self, ep_size: int, dp_size: int):
|
||||
_check_sanity()
|
||||
@@ -61,9 +60,11 @@ class MoeContext(metaclass=SingletonMeta):
|
||||
self.world_size = dist.get_world_size()
|
||||
|
||||
from colossalai.legacy.core import global_context as gpc
|
||||
self.max_ep_size = gpc.config.get('max_ep_size', self.world_size)
|
||||
assert self.world_size % self.max_ep_size == 0, \
|
||||
"Maximum expert parallel size must be a factor of the number of GPUs"
|
||||
|
||||
self.max_ep_size = gpc.config.get("max_ep_size", self.world_size)
|
||||
assert (
|
||||
self.world_size % self.max_ep_size == 0
|
||||
), "Maximum expert parallel size must be a factor of the number of GPUs"
|
||||
self.min_dp_size = self.world_size // self.max_ep_size
|
||||
|
||||
# Enabling kernel optimization may raise error in some cases
|
||||
@@ -71,6 +72,7 @@ class MoeContext(metaclass=SingletonMeta):
|
||||
self.use_kernel_optim = use_kernel_optim
|
||||
|
||||
from .random import moe_set_seed
|
||||
|
||||
moe_set_seed(seed)
|
||||
self.has_setup = True
|
||||
|
||||
@@ -88,11 +90,13 @@ class MoeContext(metaclass=SingletonMeta):
|
||||
number of local experts, the MoeParallelInfo of the current ep_size
|
||||
"""
|
||||
|
||||
gt_flag = num_experts % self.max_ep_size == 0 # check whether num_experts is greater
|
||||
lt_flag = self.max_ep_size % num_experts == 0 # check whether num_experts is less
|
||||
gt_flag = num_experts % self.max_ep_size == 0 # check whether num_experts is greater
|
||||
lt_flag = self.max_ep_size % num_experts == 0 # check whether num_experts is less
|
||||
|
||||
assert gt_flag or lt_flag, "Automatic experts placement dose not not support expert number" \
|
||||
" is not a multiple of ep size or vice versa."
|
||||
assert gt_flag or lt_flag, (
|
||||
"Automatic experts placement dose not not support expert number"
|
||||
" is not a multiple of ep size or vice versa."
|
||||
)
|
||||
|
||||
# If the number of experts is greater than maximum expert parallel size. a.k.a ep_size,
|
||||
# there are multiple experts in each GPU and each GPU has different experts
|
||||
|
@@ -16,6 +16,7 @@ class SingletonMeta(type):
|
||||
instance = super().__call__(*args, **kwargs)
|
||||
cls._instances[cls] = instance
|
||||
else:
|
||||
assert len(args) == 0 and len(
|
||||
kwargs) == 0, f'{cls.__name__} is a singleton class and a instance has been created.'
|
||||
assert (
|
||||
len(args) == 0 and len(kwargs) == 0
|
||||
), f"{cls.__name__} is a singleton class and a instance has been created."
|
||||
return cls._instances[cls]
|
||||
|
Reference in New Issue
Block a user