[npu] change device to accelerator api (#5239)

* update accelerator

* fix timer

* fix amp

* update

* fix

* update bug

* add error raise

* fix autocast

* fix set device

* remove doc accelerator

* update doc

* update doc

* update doc

* use nullcontext

* update cpu

* update null context

* change time limit for example

* udpate

* update

* update

* update

* [npu] polish accelerator code

---------

Co-authored-by: Xuanlei Zhao <xuanlei.zhao@gmail.com>
Co-authored-by: zxl <43881818+oahzxl@users.noreply.github.com>
This commit is contained in:
Hongxin Liu
2024-01-09 10:20:05 +08:00
committed by GitHub
parent dd2c28a323
commit d202cc28c0
128 changed files with 1773 additions and 868 deletions

View File

@@ -5,7 +5,7 @@ from typing import Optional
import torch
from colossalai.utils.device import get_current_device
from colossalai.accelerator import get_accelerator
from .base_grad_scaler import BaseGradScaler
@@ -37,14 +37,20 @@ class DynamicGradScaler(BaseGradScaler):
hysteresis: int = 2,
verbose: bool = False,
):
a = get_accelerator()
a.device_count()
super().__init__(initial_scale, verbose)
if min_scale:
self._min_scale = torch.tensor([min_scale], device=get_current_device(), dtype=torch.float)
self._min_scale = torch.tensor(
[min_scale], device=get_accelerator().get_current_device(), dtype=torch.float
)
else:
self._min_scale = None
if max_scale:
self._max_scale = torch.tensor([max_scale], device=get_current_device(), dtype=torch.float)
self._max_scale = torch.tensor(
[max_scale], device=get_accelerator().get_current_device(), dtype=torch.float
)
else:
self._max_scale = None
@@ -117,7 +123,7 @@ class DynamicGradScaler(BaseGradScaler):
return state_dict
def load_state_dict(self, state_dict):
self._scale = state_dict["scale"].to(get_current_device())
self._scale = state_dict["scale"].to(get_accelerator().get_current_device())
self._growth_factor = state_dict["growth_factor"]
self._backoff_factor = state_dict["backoff_factor"]
self._hysteresis = state_dict["hysteresis"]