mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-08 04:24:47 +00:00
[legacy] clean up legacy code (#4743)
* [legacy] remove outdated codes of pipeline (#4692) * [legacy] remove cli of benchmark and update optim (#4690) * [legacy] remove cli of benchmark and update optim * [doc] fix cli doc test * [legacy] fix engine clip grad norm * [legacy] remove outdated colo tensor (#4694) * [legacy] remove outdated colo tensor * [test] fix test import * [legacy] move outdated zero to legacy (#4696) * [legacy] clean up utils (#4700) * [legacy] clean up utils * [example] update examples * [legacy] clean up amp * [legacy] fix amp module * [legacy] clean up gpc (#4742) * [legacy] clean up context * [legacy] clean core, constants and global vars * [legacy] refactor initialize * [example] fix examples ci * [example] fix examples ci * [legacy] fix tests * [example] fix gpt example * [example] fix examples ci * [devops] fix ci installation * [example] fix examples ci
This commit is contained in:
18
colossalai/legacy/context/random/__init__.py
Normal file
18
colossalai/legacy/context/random/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from ._helper import (
|
||||
add_seed,
|
||||
get_current_mode,
|
||||
get_seeds,
|
||||
get_states,
|
||||
moe_set_seed,
|
||||
reset_seeds,
|
||||
seed,
|
||||
set_mode,
|
||||
set_seed_states,
|
||||
sync_states,
|
||||
with_seed,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'seed', 'set_mode', 'with_seed', 'add_seed', 'get_seeds', 'get_states', 'get_current_mode', 'set_seed_states',
|
||||
'sync_states', 'moe_set_seed', 'reset_seeds'
|
||||
]
|
172
colossalai/legacy/context/random/_helper.py
Normal file
172
colossalai/legacy/context/random/_helper.py
Normal file
@@ -0,0 +1,172 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import functools
|
||||
from contextlib import contextmanager
|
||||
|
||||
import torch.cuda
|
||||
from torch import Tensor
|
||||
|
||||
from ..parallel_mode import ParallelMode
|
||||
from .seed_manager import SeedManager
|
||||
|
||||
_SEED_MANAGER = SeedManager()
|
||||
|
||||
|
||||
def get_seeds():
|
||||
"""Returns the seeds of the seed manager.
|
||||
|
||||
Returns:
|
||||
dict: The seeds of the seed manager.
|
||||
"""
|
||||
return _SEED_MANAGER.seeds
|
||||
|
||||
|
||||
def get_states(copy=False):
|
||||
"""Returns the seed states of the seed manager.
|
||||
|
||||
Returns:
|
||||
dict: The seed states of the seed manager.
|
||||
"""
|
||||
states = _SEED_MANAGER.seed_states
|
||||
|
||||
if copy:
|
||||
new_states = dict()
|
||||
|
||||
for parallel_mode, state in states.items():
|
||||
new_states[parallel_mode] = state.clone()
|
||||
return new_states
|
||||
else:
|
||||
return _SEED_MANAGER.seed_states
|
||||
|
||||
|
||||
def get_current_mode():
|
||||
"""Returns the current mode of the seed manager.
|
||||
|
||||
Returns:
|
||||
:class:`torch.ByteTensor`: The current mode of the seed manager.
|
||||
"""
|
||||
return _SEED_MANAGER.current_mode
|
||||
|
||||
|
||||
def add_seed(parallel_mode: ParallelMode, seed: int, overwrite: bool = False):
|
||||
"""Adds a seed to the seed manager for `parallel_mode`.
|
||||
|
||||
Args:
|
||||
parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
|
||||
seed (int): The seed to be added
|
||||
Raises:
|
||||
AssertionError: Raises an AssertionError if `parallel_mode` is not an instance of
|
||||
:class:`colossalai.legacy.context.ParallelMode` or the seed for `parallel_mode` has been added.
|
||||
|
||||
Note:
|
||||
The parallel_mode should be concluded in ``ParallelMode``. More details about ``ParallelMode`` could be found
|
||||
in `parallel_mode <https://github.com/hpcaitech/ColossalAI/blob/main/colossalai/context/parallel_mode.py>`_.
|
||||
"""
|
||||
_SEED_MANAGER.add_seed(parallel_mode, seed, overwrite)
|
||||
|
||||
|
||||
def set_mode(parallel_mode: ParallelMode):
|
||||
"""Sets the current mode of the seed manager.
|
||||
|
||||
Args:
|
||||
parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
|
||||
|
||||
Note:
|
||||
The parallel_mode should be concluded in ``ParallelMode``. More details about ``ParallelMode`` could be found
|
||||
in `parallel_mode <https://github.com/hpcaitech/ColossalAI/blob/main/colossalai/context/parallel_mode.py>`_.
|
||||
"""
|
||||
_SEED_MANAGER.set_mode(parallel_mode)
|
||||
|
||||
|
||||
def set_seed_states(parallel_mode: ParallelMode, state: Tensor):
|
||||
"""Sets the state of the seed manager for `parallel_mode`.
|
||||
|
||||
Args:
|
||||
parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
|
||||
state (:class:`torch.Tensor`): the state to be set.
|
||||
|
||||
Raises:
|
||||
AssertionError: Raises an AssertionError if `parallel_mode` is not found in the seed manager.
|
||||
"""
|
||||
_SEED_MANAGER.set_state(parallel_mode, state)
|
||||
|
||||
|
||||
def sync_states():
|
||||
current_mode = get_current_mode()
|
||||
current_states = torch.cuda.get_rng_state()
|
||||
set_seed_states(current_mode, current_states)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def seed(parallel_mode: ParallelMode):
|
||||
""" A context for seed switch
|
||||
|
||||
Examples:
|
||||
|
||||
>>> with seed(ParallelMode.DATA):
|
||||
>>> output = F.dropout(input)
|
||||
|
||||
Note:
|
||||
The parallel_mode should be concluded in ``ParallelMode``. More details about ``ParallelMode`` could be found
|
||||
in `parallel_mode <https://github.com/hpcaitech/ColossalAI/blob/main/colossalai/context/parallel_mode.py>`_.
|
||||
"""
|
||||
try:
|
||||
# set to new mode
|
||||
current_mode = _SEED_MANAGER.current_mode
|
||||
yield _SEED_MANAGER.set_mode(parallel_mode)
|
||||
finally:
|
||||
# recover
|
||||
_SEED_MANAGER.set_mode(current_mode)
|
||||
|
||||
|
||||
def with_seed(func, parallel_mode: ParallelMode):
|
||||
"""
|
||||
A function wrapper which executes the function with a specified seed.
|
||||
|
||||
Examples:
|
||||
|
||||
>>> # use with decorator
|
||||
>>> @with_seed(ParallelMode.DATA)
|
||||
>>> def forward(input):
|
||||
>>> return F.dropout(input)
|
||||
>>> out = forward(input)
|
||||
>>> # OR use it inline
|
||||
>>> def forward(input):
|
||||
>>> return F.dropout(input)
|
||||
>>> wrapper_forward = with_seed(forward, ParallelMode.DATA)
|
||||
>>> out = wrapped_forward(input)
|
||||
|
||||
Note:
|
||||
The parallel_mode should be concluded in ``ParallelMode``. More details about ``ParallelMode`` could be found
|
||||
in `parallel_mode <https://github.com/hpcaitech/ColossalAI/blob/main/colossalai/context/parallel_mode.py>`_.
|
||||
"""
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
# switch mode
|
||||
current_mode = _SEED_MANAGER.current_mode
|
||||
_SEED_MANAGER.set_mode(parallel_mode)
|
||||
|
||||
# exec func
|
||||
out = func(*args, **kwargs)
|
||||
|
||||
# recover state
|
||||
_SEED_MANAGER.set_mode(current_mode)
|
||||
|
||||
return out
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def moe_set_seed(seed):
|
||||
if torch.cuda.is_available():
|
||||
from colossalai.legacy.core import global_context as gpc
|
||||
global_rank = gpc.get_global_rank()
|
||||
diff_seed = seed + global_rank
|
||||
add_seed(ParallelMode.TENSOR, diff_seed, True)
|
||||
print(f"moe seed condition: {global_rank} with tensor seed {diff_seed}", flush=True)
|
||||
|
||||
|
||||
def reset_seeds():
|
||||
_SEED_MANAGER.reset()
|
89
colossalai/legacy/context/random/seed_manager.py
Normal file
89
colossalai/legacy/context/random/seed_manager.py
Normal file
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
|
||||
from colossalai.legacy.context.parallel_mode import ParallelMode
|
||||
|
||||
|
||||
class SeedManager:
|
||||
"""This class is a manager of all random seeds involved in the system.
|
||||
|
||||
Note:
|
||||
The parallel_mode should be concluded in ``ParallelMode``. More details about ``ParallelMode`` could be found
|
||||
in `parallel_mode <https://github.com/hpcaitech/ColossalAI/blob/main/colossalai/context/parallel_mode.py>`_.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._current_mode = None
|
||||
self._seeds = dict()
|
||||
self._seed_states = dict()
|
||||
|
||||
@property
|
||||
def current_mode(self):
|
||||
return self._current_mode
|
||||
|
||||
@property
|
||||
def seeds(self):
|
||||
return self._seeds
|
||||
|
||||
@property
|
||||
def seed_states(self):
|
||||
return self._seed_states
|
||||
|
||||
def set_state(self, parallel_mode: ParallelMode, state: Tensor):
|
||||
"""Sets the state of the seed manager for `parallel_mode`.
|
||||
|
||||
Args:
|
||||
parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
|
||||
state (:class:`torch.Tensor`): the state to be set.
|
||||
|
||||
Raises:
|
||||
AssertionError: Raises an AssertionError if `parallel_mode` is not found in the seed manager.
|
||||
"""
|
||||
assert parallel_mode in self._seed_states, f'Parallel mode {parallel_mode} is not found in the seed manager'
|
||||
self._seed_states[parallel_mode] = state
|
||||
|
||||
def set_mode(self, parallel_mode: ParallelMode):
|
||||
"""Sets the current mode of the seed manager.
|
||||
|
||||
Args:
|
||||
parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
|
||||
"""
|
||||
if self.current_mode:
|
||||
# save the current state for current mode
|
||||
self._seed_states[self._current_mode] = torch.cuda.get_rng_state()
|
||||
|
||||
# set the new state for new mode
|
||||
self._current_mode = parallel_mode
|
||||
torch.cuda.set_rng_state(self._seed_states[parallel_mode])
|
||||
|
||||
def add_seed(self, parallel_mode: ParallelMode, seed: int, overwrite: bool = False):
|
||||
"""Adds a seed to the seed manager for `parallel_mode`.
|
||||
|
||||
Args:
|
||||
parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
|
||||
seed (int): The seed to be added.
|
||||
overwrite (bool, optional): Whether allows to overwrite the seed that has been set already
|
||||
|
||||
Raises:
|
||||
AssertionError: Raises an AssertionError if `parallel_mode` is not an instance of :class:`colossalai.legacy.context.ParallelMode`
|
||||
or the seed for `parallel_mode` has been added.
|
||||
"""
|
||||
assert isinstance(parallel_mode, ParallelMode), 'A valid ParallelMode must be provided'
|
||||
if overwrite is False:
|
||||
assert parallel_mode not in self._seed_states, f'The seed for {parallel_mode} has been added'
|
||||
elif parallel_mode in self._seed_states:
|
||||
print(f"Warning: {parallel_mode} seed has been overwritten.", flush=True)
|
||||
|
||||
current_state = torch.cuda.get_rng_state()
|
||||
torch.cuda.manual_seed(seed)
|
||||
self._seed_states[parallel_mode] = torch.cuda.get_rng_state()
|
||||
self._seeds[parallel_mode] = seed
|
||||
torch.cuda.set_rng_state(current_state)
|
||||
|
||||
def reset(self):
|
||||
self._current_mode = None
|
||||
self._seeds = dict()
|
||||
self._seed_states = dict()
|
Reference in New Issue
Block a user