mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-02 01:28:31 +00:00
[npu] change device to accelerator api (#5239)
* update accelerator * fix timer * fix amp * update * fix * update bug * add error raise * fix autocast * fix set device * remove doc accelerator * update doc * update doc * update doc * use nullcontext * update cpu * update null context * change time limit for example * udpate * update * update * update * [npu] polish accelerator code --------- Co-authored-by: Xuanlei Zhao <xuanlei.zhao@gmail.com> Co-authored-by: zxl <43881818+oahzxl@users.noreply.github.com>
This commit is contained in:
@@ -6,10 +6,9 @@ import torch.distributed as dist
|
||||
import transformers
|
||||
|
||||
import colossalai
|
||||
import colossalai.utils.device as device_utils
|
||||
from colossalai.accelerator import get_accelerator
|
||||
from colossalai.inference import InferenceEngine
|
||||
from colossalai.testing import clear_cache_before_run, rerun_if_address_is_in_use, spawn
|
||||
from colossalai.utils.device import get_current_device
|
||||
|
||||
GIGABYTE = 1024**3
|
||||
MEGABYTE = 1024 * 1024
|
||||
@@ -52,7 +51,7 @@ CONFIG_MAP = {
|
||||
|
||||
|
||||
def data_gen(batch_size: int = 4, seq_len: int = 512):
|
||||
input_ids = torch.randint(10, 30000, (batch_size, seq_len), device=get_current_device())
|
||||
input_ids = torch.randint(10, 30000, (batch_size, seq_len), device=get_accelerator().get_current_device())
|
||||
attention_mask = torch.ones_like(input_ids)
|
||||
data = dict(input_ids=input_ids, attention_mask=attention_mask)
|
||||
return data
|
||||
@@ -97,9 +96,9 @@ def print_details_info(outputs, model_config, args, whole_end2end):
|
||||
msg += f"Flops: {num_parameters * num_bytes / whole_avg_latency / 1e12:.2f} TFLOPS\n"
|
||||
|
||||
if torch.cuda.is_available():
|
||||
msg += f"-------Memory Summary Device:{device_utils.current_device()}-------\n"
|
||||
msg += f"Max memory allocated: {device_utils.max_memory_allocated() / GIGABYTE:.2f} GB\n"
|
||||
msg += f"Max memory reserved: {device_utils.max_memory_reserved() / GIGABYTE:.2f} GB\n"
|
||||
msg += f"-------Memory Summary Device:{get_accelerator().current_device()}-------\n"
|
||||
msg += f"Max memory allocated: {get_accelerator().max_memory_allocated() / GIGABYTE:.2f} GB\n"
|
||||
msg += f"Max memory reserved: {get_accelerator().max_memory_reserved() / GIGABYTE:.2f} GB\n"
|
||||
|
||||
print(msg)
|
||||
|
||||
|
@@ -5,9 +5,9 @@ import torch.distributed as dist
|
||||
from transformers import LlamaForCausalLM, LlamaTokenizer
|
||||
|
||||
import colossalai
|
||||
from colossalai.accelerator import get_accelerator
|
||||
from colossalai.inference import InferenceEngine
|
||||
from colossalai.testing import spawn
|
||||
from colossalai.utils.device import get_current_device
|
||||
|
||||
INPUT_TEXTS = [
|
||||
"What is the longest river in the world?",
|
||||
@@ -57,7 +57,7 @@ def run_inference(args):
|
||||
)
|
||||
|
||||
inputs = tokenizer(INPUT_TEXTS, return_tensors="pt", padding="longest", max_length=max_input_len, truncation=True)
|
||||
inputs = {k: v.to(get_current_device()) for k, v in inputs.items()}
|
||||
inputs = {k: v.to(get_accelerator().get_current_device()) for k, v in inputs.items()}
|
||||
outputs = engine.generate(inputs)
|
||||
|
||||
if rank == 0:
|
||||
|
Reference in New Issue
Block a user