[npu] change device to accelerator api (#5239)

* update accelerator

* fix timer

* fix amp

* update

* fix

* update bug

* add error raise

* fix autocast

* fix set device

* remove doc accelerator

* update doc

* update doc

* update doc

* use nullcontext

* update cpu

* update null context

* change time limit for example

* udpate

* update

* update

* update

* [npu] polish accelerator code

---------

Co-authored-by: Xuanlei Zhao <xuanlei.zhao@gmail.com>
Co-authored-by: zxl <43881818+oahzxl@users.noreply.github.com>
This commit is contained in:
Hongxin Liu
2024-01-09 10:20:05 +08:00
committed by GitHub
parent dd2c28a323
commit d202cc28c0
128 changed files with 1773 additions and 868 deletions

View File

@@ -4,15 +4,15 @@ import torch.distributed as dist
from torch.distributed.distributed_c10d import _get_default_group
import colossalai
from colossalai.accelerator import get_accelerator
from colossalai.tensor import ColoParameter
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
from colossalai.utils import get_current_device
from colossalai.zero.gemini import TensorState
from colossalai.zero.gemini.chunk import Chunk
def dist_sum(x):
temp = torch.tensor([x], device=get_current_device())
temp = torch.tensor([x], device=get_accelerator().get_current_device())
dist.all_reduce(temp)
return temp.item()
@@ -66,7 +66,7 @@ def exam_chunk_basic(init_device, keep_gathered, pin_memory):
assert my_chunk.cpu_shard.size(0) == 1024 // world_size
assert my_chunk.device_type == "cpu"
assert my_chunk.can_move
my_chunk.shard_move(get_current_device())
my_chunk.shard_move(get_accelerator().get_current_device())
else:
assert my_chunk.cuda_global_chunk.size(0) == 1024
assert my_chunk.device_type == "cuda"