mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-16 14:41:53 +00:00
[npu] change device to accelerator api (#5239)
* update accelerator * fix timer * fix amp * update * fix * update bug * add error raise * fix autocast * fix set device * remove doc accelerator * update doc * update doc * update doc * use nullcontext * update cpu * update null context * change time limit for example * udpate * update * update * update * [npu] polish accelerator code --------- Co-authored-by: Xuanlei Zhao <xuanlei.zhao@gmail.com> Co-authored-by: zxl <43881818+oahzxl@users.noreply.github.com>
This commit is contained in:
@@ -20,11 +20,11 @@ from tqdm.auto import tqdm
|
||||
from transformers import AutoTokenizer, PretrainedConfig
|
||||
|
||||
import colossalai
|
||||
from colossalai.accelerator import get_accelerator
|
||||
from colossalai.booster import Booster
|
||||
from colossalai.booster.plugin import GeminiPlugin, LowLevelZeroPlugin, TorchDDPPlugin
|
||||
from colossalai.logging import disable_existing_loggers, get_dist_logger
|
||||
from colossalai.nn.optimizer import HybridAdam
|
||||
from colossalai.utils import get_current_device
|
||||
|
||||
disable_existing_loggers()
|
||||
logger = get_dist_logger()
|
||||
@@ -386,7 +386,7 @@ def main(args):
|
||||
cur_class_images = len(list(class_images_dir.iterdir()))
|
||||
|
||||
if cur_class_images < args.num_class_images:
|
||||
torch_dtype = torch.float16 if get_current_device() == "cuda" else torch.float32
|
||||
torch_dtype = torch.float16 if get_accelerator().get_current_device() == "cuda" else torch.float32
|
||||
pipeline = DiffusionPipeline.from_pretrained(
|
||||
args.pretrained_model_name_or_path,
|
||||
torch_dtype=torch_dtype,
|
||||
@@ -401,7 +401,7 @@ def main(args):
|
||||
sample_dataset = PromptDataset(args.class_prompt, num_new_images)
|
||||
sample_dataloader = torch.utils.data.DataLoader(sample_dataset, batch_size=args.sample_batch_size)
|
||||
|
||||
pipeline.to(get_current_device())
|
||||
pipeline.to(get_accelerator().get_current_device())
|
||||
|
||||
for example in tqdm(
|
||||
sample_dataloader,
|
||||
@@ -578,8 +578,8 @@ def main(args):
|
||||
# Move text_encode and vae to gpu.
|
||||
# For mixed precision training we cast the text_encoder and vae weights to half-precision
|
||||
# as these models are only used for inference, keeping weights in full precision is not required.
|
||||
vae.to(get_current_device(), dtype=weight_dtype)
|
||||
text_encoder.to(get_current_device(), dtype=weight_dtype)
|
||||
vae.to(get_accelerator().get_current_device(), dtype=weight_dtype)
|
||||
text_encoder.to(get_accelerator().get_current_device(), dtype=weight_dtype)
|
||||
|
||||
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
|
||||
num_update_steps_per_epoch = math.ceil(len(train_dataloader))
|
||||
@@ -613,7 +613,7 @@ def main(args):
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
# Move batch to gpu
|
||||
for key, value in batch.items():
|
||||
batch[key] = value.to(get_current_device(), non_blocking=True)
|
||||
batch[key] = value.to(get_accelerator().get_current_device(), non_blocking=True)
|
||||
|
||||
# Convert images to latent space
|
||||
optimizer.zero_grad()
|
||||
|
@@ -21,13 +21,13 @@ from tqdm.auto import tqdm
|
||||
from transformers import AutoTokenizer, PretrainedConfig
|
||||
|
||||
import colossalai
|
||||
from colossalai.accelerator import get_accelerator
|
||||
from colossalai.booster import Booster
|
||||
from colossalai.booster.plugin import GeminiPlugin, LowLevelZeroPlugin, TorchDDPPlugin
|
||||
from colossalai.legacy.context.parallel_mode import ParallelMode
|
||||
from colossalai.legacy.core import global_context as gpc
|
||||
from colossalai.logging import disable_existing_loggers, get_dist_logger
|
||||
from colossalai.nn.optimizer import HybridAdam
|
||||
from colossalai.utils import get_current_device
|
||||
|
||||
disable_existing_loggers()
|
||||
logger = get_dist_logger()
|
||||
@@ -385,7 +385,7 @@ def main(args):
|
||||
cur_class_images = len(list(class_images_dir.iterdir()))
|
||||
|
||||
if cur_class_images < args.num_class_images:
|
||||
torch_dtype = torch.float16 if get_current_device() == "cuda" else torch.float32
|
||||
torch_dtype = torch.float16 if get_accelerator().get_current_device() == "cuda" else torch.float32
|
||||
pipeline = DiffusionPipeline.from_pretrained(
|
||||
args.pretrained_model_name_or_path,
|
||||
torch_dtype=torch_dtype,
|
||||
@@ -400,7 +400,7 @@ def main(args):
|
||||
sample_dataset = PromptDataset(args.class_prompt, num_new_images)
|
||||
sample_dataloader = torch.utils.data.DataLoader(sample_dataset, batch_size=args.sample_batch_size)
|
||||
|
||||
pipeline.to(get_current_device())
|
||||
pipeline.to(get_accelerator().get_current_device())
|
||||
|
||||
for example in tqdm(
|
||||
sample_dataloader,
|
||||
@@ -598,8 +598,8 @@ def main(args):
|
||||
# Move text_encode and vae to gpu.
|
||||
# For mixed precision training we cast the text_encoder and vae weights to half-precision
|
||||
# as these models are only used for inference, keeping weights in full precision is not required.
|
||||
vae.to(get_current_device(), dtype=weight_dtype)
|
||||
text_encoder.to(get_current_device(), dtype=weight_dtype)
|
||||
vae.to(get_accelerator().get_current_device(), dtype=weight_dtype)
|
||||
text_encoder.to(get_accelerator().get_current_device(), dtype=weight_dtype)
|
||||
|
||||
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
|
||||
num_update_steps_per_epoch = math.ceil(len(train_dataloader))
|
||||
@@ -633,7 +633,7 @@ def main(args):
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
# Move batch to gpu
|
||||
for key, value in batch.items():
|
||||
batch[key] = value.to(get_current_device(), non_blocking=True)
|
||||
batch[key] = value.to(get_accelerator().get_current_device(), non_blocking=True)
|
||||
|
||||
# Convert images to latent space
|
||||
optimizer.zero_grad()
|
||||
|
@@ -13,12 +13,12 @@ from torch.utils.data import DataLoader
|
||||
from tqdm import tqdm
|
||||
|
||||
import colossalai
|
||||
from colossalai.accelerator import get_accelerator
|
||||
from colossalai.booster import Booster
|
||||
from colossalai.booster.plugin import GeminiPlugin, LowLevelZeroPlugin, TorchDDPPlugin
|
||||
from colossalai.booster.plugin.dp_plugin_base import DPPluginBase
|
||||
from colossalai.cluster import DistCoordinator
|
||||
from colossalai.nn.optimizer import HybridAdam
|
||||
from colossalai.utils import get_current_device
|
||||
|
||||
# ==============================
|
||||
# Prepare Hyperparameters
|
||||
@@ -53,8 +53,8 @@ def build_dataloader(batch_size: int, coordinator: DistCoordinator, plugin: DPPl
|
||||
@torch.no_grad()
|
||||
def evaluate(model: nn.Module, test_dataloader: DataLoader, coordinator: DistCoordinator) -> float:
|
||||
model.eval()
|
||||
correct = torch.zeros(1, dtype=torch.int64, device=get_current_device())
|
||||
total = torch.zeros(1, dtype=torch.int64, device=get_current_device())
|
||||
correct = torch.zeros(1, dtype=torch.int64, device=get_accelerator().get_current_device())
|
||||
total = torch.zeros(1, dtype=torch.int64, device=get_accelerator().get_current_device())
|
||||
for images, labels in test_dataloader:
|
||||
images = images.cuda()
|
||||
labels = labels.cuda()
|
||||
|
@@ -33,9 +33,10 @@ def get_data_batch(batch_size, num_labels, num_channels=3, height=224, width=224
|
||||
|
||||
|
||||
def colo_memory_cap(size_in_GB):
|
||||
from colossalai.utils import colo_device_memory_capacity, colo_set_process_memory_fraction, get_current_device
|
||||
from colossalai.accelerator import get_accelerator
|
||||
from colossalai.utils import colo_device_memory_capacity, colo_set_process_memory_fraction
|
||||
|
||||
cuda_capacity = colo_device_memory_capacity(get_current_device())
|
||||
cuda_capacity = colo_device_memory_capacity(get_accelerator().get_current_device())
|
||||
if size_in_GB * (1024**3) < cuda_capacity:
|
||||
colo_set_process_memory_fraction(size_in_GB * (1024**3) / cuda_capacity)
|
||||
print(f"Limiting GPU memory usage to {size_in_GB} GB")
|
||||
|
Reference in New Issue
Block a user