mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-09 04:50:17 +00:00
[npu] add npu support for hybrid plugin and llama (#5090)
* llama 3d * update * fix autocast
This commit is contained in:
@@ -9,6 +9,7 @@ from typing import Any, Callable, List
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
from packaging import version
|
||||
from colossalai.utils.device import empty_cache, reset_max_memory_allocated, reset_peak_memory_stats, synchronize, reset_max_memory_cached, device_count
|
||||
|
||||
|
||||
def parameterize(argument: str, values: List[Any]) -> Callable:
|
||||
@@ -198,7 +199,7 @@ def skip_if_not_enough_gpus(min_gpus: int):
|
||||
|
||||
def _wrap_func(f):
|
||||
def _execute_by_gpu_num(*args, **kwargs):
|
||||
num_avail_gpu = torch.cuda.device_count()
|
||||
num_avail_gpu = device_count()
|
||||
if num_avail_gpu >= min_gpus:
|
||||
f(*args, **kwargs)
|
||||
|
||||
@@ -262,11 +263,11 @@ def clear_cache_before_run():
|
||||
|
||||
def _wrap_func(f):
|
||||
def _clear_cache(*args, **kwargs):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_max_memory_cached()
|
||||
torch.cuda.synchronize()
|
||||
empty_cache()
|
||||
reset_peak_memory_stats()
|
||||
reset_max_memory_allocated()
|
||||
reset_max_memory_cached()
|
||||
synchronize()
|
||||
gc.collect()
|
||||
f(*args, **kwargs)
|
||||
|
||||
|
Reference in New Issue
Block a user