[ci] fix shardformer tests. (#5255)

* fix ci

fix

* revert: revert p2p

* feat: add enable_metadata_cache option

* revert: enable t5 tests

---------

Co-authored-by: Wenhao Chen <cwher@outlook.com>
This commit is contained in:
flybird11111
2024-01-11 19:07:45 +08:00
committed by GitHub
parent 756c400ad2
commit e830ef917d
4 changed files with 20 additions and 3 deletions

View File

@@ -86,6 +86,7 @@ def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, loss_fn,
"tp_size": 2,
"pp_size": 2,
"num_microbatches": 2,
"enable_metadata_cache": False,
"enable_all_optimization": True,
"use_lazy_init": True,
"precision": "fp16",
@@ -95,6 +96,7 @@ def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, loss_fn,
"tp_size": 1,
"pp_size": 2,
"num_microbatches": 4,
"enable_metadata_cache": False,
"use_lazy_init": False,
"precision": "fp16",
"initial_scale": 1,
@@ -110,6 +112,7 @@ def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, loss_fn,
"tp_size": 1,
"pp_size": 4,
"num_microbatches": 4,
"enable_metadata_cache": False,
"enable_all_optimization": False,
"use_lazy_init": False,
"precision": "fp32",
@@ -128,6 +131,7 @@ def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, loss_fn,
"tp_size": 1,
"pp_size": 2,
"num_microbatches": 2,
"enable_metadata_cache": False,
"enable_all_optimization": True,
"use_lazy_init": True,
"zero_stage": 1,
@@ -159,6 +163,7 @@ def run_t5_test(test_config):
"tp_size": 2,
"pp_size": 2,
"num_microbatches": 4,
"enable_metadata_cache": False,
"enable_all_optimization": False,
"use_lazy_init": False,
"precision": "fp32",
@@ -168,6 +173,7 @@ def run_t5_test(test_config):
"tp_size": 2,
"pp_size": 2,
"num_microbatches": 4,
"enable_metadata_cache": False,
"enable_all_optimization": False,
"use_lazy_init": False,
"precision": "fp16",