[npu] add npu support for hybrid plugin and llama (#5090)

* llama 3d

* update

* fix autocast
This commit is contained in:
Xuanlei Zhao
2023-11-22 19:23:21 +08:00
committed by GitHub
parent aae496631c
commit 3acbf6d496
9 changed files with 61 additions and 40 deletions

View File

@@ -9,6 +9,7 @@ from typing import Any, Callable, List
import torch
import torch.multiprocessing as mp
from packaging import version
from colossalai.utils.device import empty_cache, reset_max_memory_allocated, reset_peak_memory_stats, synchronize, reset_max_memory_cached, device_count
def parameterize(argument: str, values: List[Any]) -> Callable:
@@ -198,7 +199,7 @@ def skip_if_not_enough_gpus(min_gpus: int):
def _wrap_func(f):
def _execute_by_gpu_num(*args, **kwargs):
num_avail_gpu = torch.cuda.device_count()
num_avail_gpu = device_count()
if num_avail_gpu >= min_gpus:
f(*args, **kwargs)
@@ -262,11 +263,11 @@ def clear_cache_before_run():
def _wrap_func(f):
def _clear_cache(*args, **kwargs):
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_max_memory_cached()
torch.cuda.synchronize()
empty_cache()
reset_peak_memory_stats()
reset_max_memory_allocated()
reset_max_memory_cached()
synchronize()
gc.collect()
f(*args, **kwargs)