[pipeline] Llama pipeline (#4205)

* bloom policy * llama pipeline forward and tests * fix the output and attention_mask * fix name * bind argument to policy * Revert "bloom policy" This reverts commit 8dee68a0a2. This policy should be revert and copied to feature/bloom * revert the bloom changes * cancel unneeded inputs * gpt
2025-09-12 20:54:35 +00:00 · 2023-07-11 11:37:26 +08:00
parent 1094e0f0d3
commit 1622031058
6 changed files with 516 additions and 4 deletions
--- a/tests/kit/model_zoo/transformers/gpt.py
+++ b/tests/kit/model_zoo/transformers/gpt.py
@@ -52,7 +52,7 @@ loss_fn_for_gpt2_model = lambda x: x.last_hidden_state.mean()
 loss_fn = lambda x: x.loss

 config = transformers.GPT2Config(n_layer=2,
-                                 n_head=4,
+                                 n_head=2,
                                 vocab_size=50258,
                                 attn_pdrop=0,
                                 embd_pdrop=0,
--- a/tests/test_shardformer/test_model/_utils.py
+++ b/tests/test_shardformer/test_model/_utils.py
@@ -39,6 +39,7 @@ def build_pipeline_model(model_fn,
    shard_config = ShardConfig(enable_fused_normalization=enable_fused_normalization,
                               enable_tensor_parallelism=enable_tensor_parallelism,
                               pipeline_stage_manager=stage_manager)
+
    shard_former = ShardFormer(shard_config=shard_config)
    sharded_model, shared_params = shard_former.optimize(model_copy)
    return org_model.cuda(), sharded_model.cuda()
--- a/tests/test_shardformer/test_model/test_shard_llama_pipeline.py
+++ b/tests/test_shardformer/test_model/test_shard_llama_pipeline.py
@@ -0,0 +1,85 @@
+import pytest
+import torch
+
+import colossalai
+from colossalai.cluster import ProcessGroupMesh
+from colossalai.logging import disable_existing_loggers
+from colossalai.pipeline.stage_manager import PipelineStageManager
+from colossalai.shardformer.policies.base_policy import Policy
+from colossalai.tensor.d_tensor.api import is_customized_distributed_tensor, is_distributed_tensor
+from colossalai.testing import (
+    assert_hf_output_close,
+    clear_cache_before_run,
+    parameterize,
+    rerun_if_address_is_in_use,
+    spawn,
+)
+from tests.kit.model_zoo import model_zoo
+from tests.test_shardformer.test_model._utils import build_model, build_pipeline_model, run_forward
+
+
+def check_forward_backward(org_model, sharded_model, data_gen_fn, output_transform_fn, loss_fn):
+    # check forward
+    pass
+
+
+@parameterize('enable_fused_normalization', [False])
+@parameterize('enable_tensor_parallelism', [False])
+@parameterize('use_lazy_init', [False])
+#TODO: merge this into test_shard_llama
+def run_llama_test(enable_fused_normalization, enable_tensor_parallelism, use_lazy_init):
+    DP_DIM, PP_DIM = 0, 1
+    DP_SIZE, PP_SIZE = 2, 2
+    RANK_TO_COORDINATE = {
+        0: (0, 0),
+        1: (0, 1),
+        2: (1, 0),
+        3: (1, 1),
+    }
+    PP_RANKS_IN_GROUP = {
+        0: [0, 1],
+        1: [0, 1],
+        2: [2, 3],
+        3: [2, 3],
+    }
+    pg_mesh = ProcessGroupMesh(DP_SIZE, PP_SIZE)
+    stage_manager = PipelineStageManager(pg_mesh, PP_DIM)
+
+    sub_model_zoo = model_zoo.get_sub_registry('transformers_llama')
+    x = torch.randint(0, 1000, (2, 3)).cuda()
+    hidden_states = torch.randint(0, 1000, (2, 3, 128)).to(torch.float32).cuda()
+    for name, (model_fn, data_gen_fn, output_transform_fn, loss_fn, _) in sub_model_zoo.items():
+        if name == 'transformers_llama':
+            org_model, sharded_model = build_pipeline_model(model_fn, stage_manager, enable_fused_normalization,
+                                                            enable_tensor_parallelism, use_lazy_init)
+            if stage_manager.stage == 0:
+                attention_mask = torch.ones_like(x).cuda()
+                output = sharded_model(input_ids=x, attention_mask=attention_mask)
+                assert output['hidden_states'].shape == (2, 3, 128)
+            else:
+                attention_mask = torch.ones((2, 3)).cuda()
+                output = sharded_model(
+                    hidden_states=hidden_states,
+                    attention_mask=attention_mask,
+                )
+                # print(output[0].shape)
+                assert output[0].shape == (2, 3, 128)
+
+    torch.cuda.empty_cache()
+
+
+def check_llama(rank, world_size, port):
+    disable_existing_loggers()
+    colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
+    run_llama_test()
+
+
+@pytest.mark.dist
+@rerun_if_address_is_in_use()
+@clear_cache_before_run()
+def test_llama():
+    spawn(check_llama, 4)
+
+
+if __name__ == "__main__":
+    test_llama()