[shardformer/sequence parallel] support gpt2 seq parallel with pp/dp/tp (#4460)

* support gpt2 seq parallel with pp/dp/tp * fix a bug when waiting for stream done * delete unused gpt2_seq file
2025-09-07 03:52:01 +00:00 · 2023-08-18 11:21:53 +08:00
parent a78daf6180
commit 7c8be77081
6 changed files with 268 additions and 240 deletions
--- a/tests/test_shardformer/test_model/test_shard_gpt2.py
+++ b/tests/test_shardformer/test_model/test_shard_gpt2.py
@@ -105,10 +105,18 @@ def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, loss_fn,
    'enable_all_optimization': True,
    'use_lazy_init': False,
    'precision': 'fp32',
+}, {
+    'tp_size': 2,
+    'pp_size': 2,
+    'num_microbatches': 4,
+    'enable_all_optimization': True,
+    'use_lazy_init': True,
+    'enable_sequence_parallelism': True,
+    'precision': 'fp32',
 }, {
    'tp_size': 4,
    'pp_size': 1,
-    'enable_all_optimization': False,
+    'enable_all_optimization': True,
    'use_lazy_init': True,
    'enable_sequence_parallelism': True,
    'precision': 'fp32',