[Colossal-Inference] (v0.1.0) Merge pull request #5739 from hpcaitech/feature/colossal-infer

[Inference] Merge feature/colossal-infer
2025-09-07 20:10:17 +00:00 · 2024-05-22 14:31:09 +08:00
parent 22ce873c3f 498f42c45b
commit df6747603f
226 changed files with 20489 additions and 9729 deletions
--- a/colossalai/shardformer/layer/embedding.py
+++ b/colossalai/shardformer/layer/embedding.py
@@ -249,7 +249,6 @@ class VocabParallelEmbedding1D(PaddingParallelModule):

    The ``args`` and ``kwargs`` used in :class:``torch.nn.functional.embedding`` should contain:
    ::
-
        max_norm (float, optional): If given, each embedding vector with norm larger than max_norm is
                    renormalized to have norm max_norm. Note: this will modify weight in-place.
        norm_type (float, optional): The p of the p-norm to compute for the max_norm option. Default 2.
--- a/colossalai/shardformer/shard/shard_config.py
+++ b/colossalai/shardformer/shard/shard_config.py
@@ -125,9 +125,3 @@ class ShardConfig:
        # It may also slow down training when seq len is small. Plz enable manually.
        # self.enable_sequence_parallelism = True
        # self.enable_sequence_overlap = True
-
-    def _infer(self):
-        """
-        Set default params for inference.
-        """
-        # assert self.pipeline_stage_manager is None, "pipeline parallelism is not supported in inference for now"
--- a/colossalai/shardformer/shard/shardformer.py
+++ b/colossalai/shardformer/shard/shardformer.py
@@ -1,6 +1,7 @@
 import os
 from typing import Dict, List, Tuple

+import torch.distributed as dist
 import torch.nn as nn
 from torch import Tensor

@@ -36,7 +37,11 @@ class ShardFormer:
    """

    def __init__(self, shard_config: ShardConfig):
-        self.coordinator = DistCoordinator()
+        self.is_distributed = dist.is_initialized()
+        if self.is_distributed:
+            self.coordinator = DistCoordinator()
+        else:
+            self.coordinator = None
        self.shard_config = shard_config

    def optimize(self, model: nn.Module, policy: Policy = None) -> Tuple[nn.Module, List[Dict[int, Tensor]]]: