[legacy] clean up legacy code (#4743)

* [legacy] remove outdated codes of pipeline (#4692) * [legacy] remove cli of benchmark and update optim (#4690) * [legacy] remove cli of benchmark and update optim * [doc] fix cli doc test * [legacy] fix engine clip grad norm * [legacy] remove outdated colo tensor (#4694) * [legacy] remove outdated colo tensor * [test] fix test import * [legacy] move outdated zero to legacy (#4696) * [legacy] clean up utils (#4700) * [legacy] clean up utils * [example] update examples * [legacy] clean up amp * [legacy] fix amp module * [legacy] clean up gpc (#4742) * [legacy] clean up context * [legacy] clean core, constants and global vars * [legacy] refactor initialize * [example] fix examples ci * [example] fix examples ci * [legacy] fix tests * [example] fix gpt example * [example] fix examples ci * [devops] fix ci installation * [example] fix examples ci
2025-11-28 03:46:58 +00:00 · 2023-09-18 16:31:06 +08:00
parent 32e7f99416
commit b5f9e37c70
342 changed files with 2919 additions and 4182 deletions
--- a/examples/tutorial/sequence_parallel/model/layers/head.py
+++ b/examples/tutorial/sequence_parallel/model/layers/head.py
@@ -1,15 +1,17 @@
-import colossalai
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from .pooler import Pooler
-from .linear import Linear
-from .embedding import VocabEmbedding
-from colossalai.core import global_context as gpc
-from colossalai.context import ParallelMode
-from colossalai.kernel import LayerNorm
 from loss_func.cross_entropy import vocab_cross_entropy

+import colossalai
+from colossalai.kernel import LayerNorm
+from colossalai.legacy.context import ParallelMode
+from colossalai.legacy.core import global_context as gpc
+
+from .embedding import VocabEmbedding
+from .linear import Linear
+from .pooler import Pooler
+

 class BertLMHead(nn.Module):
    """Masked LM head for Bert
@@ -19,10 +21,11 @@ class BertLMHead(nn.Module):
        layernorm_epsilon: tolerance for layer norm divisions
    """

-    def __init__(self,
-                 vocab_size,
-                 hidden_size,
-                 ):
+    def __init__(
+        self,
+        vocab_size,
+        hidden_size,
+    ):

        super(BertLMHead, self).__init__()
        self.bias = torch.nn.Parameter(torch.zeros(vocab_size))
--- a/examples/tutorial/sequence_parallel/model/layers/preprocess.py
+++ b/examples/tutorial/sequence_parallel/model/layers/preprocess.py
@@ -1,7 +1,8 @@
-from colossalai.context.parallel_mode import ParallelMode
 import torch
 import torch.nn as nn
-from colossalai.core import global_context as gpc
+
+from colossalai.legacy.context.parallel_mode import ParallelMode
+from colossalai.legacy.core import global_context as gpc


 class PreProcessor(nn.Module):
@@ -14,8 +15,8 @@ class PreProcessor(nn.Module):
        # Create position ids
        seq_length = token_ids.size(1)
        local_rank = gpc.get_local_rank(ParallelMode.SEQUENCE)
-        position_ids = torch.arange(seq_length*local_rank,
-                                    seq_length * (local_rank+1),
+        position_ids = torch.arange(seq_length * local_rank,
+                                    seq_length * (local_rank + 1),
                                    dtype=torch.long,
                                    device=token_ids.device)
        position_ids = position_ids.unsqueeze(0).expand_as(token_ids)