[booster] added the accelerator implementation (#3159)

2025-09-12 12:47:21 +00:00 · 2023-03-20 13:59:24 +08:00
parent 1ad3a636b1
commit a9b8402d93
4 changed files with 72 additions and 5 deletions
--- a/colossalai/booster/accelerator.py
+++ b/colossalai/booster/accelerator.py
@@ -3,12 +3,52 @@ import torch.nn as nn

 __all__ = ['Accelerator']

+_supported_devices = [
+    'cpu',
+    'cuda',
+
+    # To be supported
+    # 'xpu',
+    # 'npu',
+    # 'tpu',
+]
+

 class Accelerator:
+    """
+    Accelerator is an abstraction for the hardware device that is used to run the model.

-    def __init__(self, device: torch.device):
+    Args:
+        device (str): The device to be used. Currently only support 'cpu' and 'gpu'.
+    """
+
+    def __init__(self, device: str):
        self.device = device

-    def setup_model(self, model: nn.Module) -> nn.Module:
-        # TODO: implement this method
-        pass
+        assert self.device in _supported_devices, f"Device {self.device} is not supported yet, supported devices include {_supported_devices}"
+
+    def bind(self):
+        """
+        Set the default device for the current process.
+        """
+        if self.device == 'cpu':
+            pass
+        elif self.device == 'cuda':
+            # TODO(FrankLeeeee): use global environment to check if it is a dist job
+            # if is_distributed:
+            #     local_rank = EnvTable().get_local_rank()
+            #     torch.cuda.set_device(torch.device(f'cuda:{local_rank}'))
+            torch.cuda.set_device(torch.device('cuda'))
+            pass
+        else:
+            raise ValueError(f"Device {self.device} is not supported yet")
+
+    def configure_model(self, model: nn.Module) -> nn.Module:
+        """
+        Move the model to the device.
+
+        Args:
+            model (nn.Module): The model to be moved.
+        """
+        model = model.to(torch.device(self.device))
+        return model