[npu] add npu support for hybrid plugin and llama (#5090)

* llama 3d * update * fix autocast
2025-09-09 04:50:17 +00:00 · 2023-11-22 19:23:21 +08:00
parent aae496631c
commit 3acbf6d496
9 changed files with 61 additions and 40 deletions
--- a/colossalai/testing/utils.py
+++ b/colossalai/testing/utils.py
@@ -9,6 +9,7 @@ from typing import Any, Callable, List
 import torch
 import torch.multiprocessing as mp
 from packaging import version
+from colossalai.utils.device import empty_cache, reset_max_memory_allocated, reset_peak_memory_stats, synchronize, reset_max_memory_cached, device_count


 def parameterize(argument: str, values: List[Any]) -> Callable:
@@ -198,7 +199,7 @@ def skip_if_not_enough_gpus(min_gpus: int):

    def _wrap_func(f):
        def _execute_by_gpu_num(*args, **kwargs):
-            num_avail_gpu = torch.cuda.device_count()
+            num_avail_gpu = device_count()
            if num_avail_gpu >= min_gpus:
                f(*args, **kwargs)

@@ -262,11 +263,11 @@ def clear_cache_before_run():

    def _wrap_func(f):
        def _clear_cache(*args, **kwargs):
-            torch.cuda.empty_cache()
-            torch.cuda.reset_peak_memory_stats()
-            torch.cuda.reset_max_memory_allocated()
-            torch.cuda.reset_max_memory_cached()
-            torch.cuda.synchronize()
+            empty_cache()
+            reset_peak_memory_stats()
+            reset_max_memory_allocated()
+            reset_max_memory_cached()
+            synchronize()
            gc.collect()
            f(*args, **kwargs)