diff --git a/colossalai/amp/naive_amp/_fp16_optimizer.py b/colossalai/amp/naive_amp/_fp16_optimizer.py index b1fc621c2..01842590f 100644 --- a/colossalai/amp/naive_amp/_fp16_optimizer.py +++ b/colossalai/amp/naive_amp/_fp16_optimizer.py @@ -14,7 +14,7 @@ from colossalai.context.parallel_mode import ParallelMode from colossalai.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.utils import (print_rank_0, copy_tensor_parallel_attributes, - clip_grad_norm_fp32, count_zeros_fp32, multi_tensor_applier, is_using_pp) + clip_grad_norm_fp32, count_zeros_fp32, multi_tensor_applier) def _zero_grad_group_helper(group, set_to_none): diff --git a/colossalai/nn/optimizer/cpu_adam.py b/colossalai/nn/optimizer/cpu_adam.py index 21f607c48..55f50bfdc 100644 --- a/colossalai/nn/optimizer/cpu_adam.py +++ b/colossalai/nn/optimizer/cpu_adam.py @@ -1,10 +1,4 @@ -# modified from https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/ops/adam/cpu_adam.py - -import math import torch -import time -from pathlib import Path -import colossalai class CPUAdam(torch.optim.Optimizer):