diff --git a/colossalai/nn/optimizer/cpu_adam.py b/colossalai/nn/optimizer/cpu_adam.py index a8c352279..54036973e 100644 --- a/colossalai/nn/optimizer/cpu_adam.py +++ b/colossalai/nn/optimizer/cpu_adam.py @@ -19,7 +19,7 @@ class CPUAdam(NVMeOptimizer): * Parameters on GPU and gradients on GPU is allowed. * Parameters on GPU and gradients on CPU is **not** allowed. - Requires ColossalAI to be installed via ``pip install .``. + `CPUAdam` requires CUDA extensions which can be built during installation or runtime. This version of CPU Adam accelates parameters updating on CPU with SIMD. Support of AVX2 or AVX512 is required. diff --git a/colossalai/nn/optimizer/fused_adam.py b/colossalai/nn/optimizer/fused_adam.py index 2f6bde5ca..941866d55 100644 --- a/colossalai/nn/optimizer/fused_adam.py +++ b/colossalai/nn/optimizer/fused_adam.py @@ -9,8 +9,7 @@ from colossalai.utils import multi_tensor_applier class FusedAdam(torch.optim.Optimizer): """Implements Adam algorithm. - Currently GPU-only. Requires ColossalAI to be installed via - ``pip install .``. + `FusedAdam` requires CUDA extensions which can be built during installation or runtime. This version of fused Adam implements 2 fusions. diff --git a/colossalai/nn/optimizer/fused_lamb.py b/colossalai/nn/optimizer/fused_lamb.py index 891a76da7..72520064e 100644 --- a/colossalai/nn/optimizer/fused_lamb.py +++ b/colossalai/nn/optimizer/fused_lamb.py @@ -9,8 +9,7 @@ from colossalai.utils import multi_tensor_applier class FusedLAMB(torch.optim.Optimizer): """Implements LAMB algorithm. - Currently GPU-only. Requires ColossalAI to be installed via - ``pip install .``. + `FusedLAMB` requires CUDA extensions which can be built during installation or runtime. This version of fused LAMB implements 2 fusions. diff --git a/colossalai/nn/optimizer/fused_sgd.py b/colossalai/nn/optimizer/fused_sgd.py index 41e6d5248..468713b22 100644 --- a/colossalai/nn/optimizer/fused_sgd.py +++ b/colossalai/nn/optimizer/fused_sgd.py @@ -10,8 +10,7 @@ from colossalai.utils import multi_tensor_applier class FusedSGD(Optimizer): r"""Implements stochastic gradient descent (optionally with momentum). - Currently GPU-only. Requires ColossalAI to be installed via - ``pip install .``. + `FusedSGD` requires CUDA extensions which can be built during installation or runtime. This version of fused SGD implements 2 fusions. diff --git a/colossalai/nn/optimizer/hybrid_adam.py b/colossalai/nn/optimizer/hybrid_adam.py index 5196d4338..1d0fb92de 100644 --- a/colossalai/nn/optimizer/hybrid_adam.py +++ b/colossalai/nn/optimizer/hybrid_adam.py @@ -19,7 +19,7 @@ class HybridAdam(NVMeOptimizer): * Parameters on GPU and gradients on GPU is allowed. * Parameters on GPU and gradients on CPU is **not** allowed. - Requires ColossalAI to be installed via ``pip install .`` + `HybriadAdam` requires CUDA extensions which can be built during installation or runtime. This version of Hybrid Adam is an hybrid of CPUAdam and FusedAdam.