diff --git a/tests/test_device/test_init_logical_pg.py b/tests/test_device/test_init_logical_pg.py index d93f65698..20d69b2a7 100644 --- a/tests/test_device/test_init_logical_pg.py +++ b/tests/test_device/test_init_logical_pg.py @@ -26,7 +26,7 @@ def check_layer(rank, world_size, port): dist.all_reduce(tensor, op=ReduceOp.SUM, group=pg) assert tensor.equal(tensor_to_check) - +@pytest.mark.skip("tested in corresponding sharderformer") @pytest.mark.dist @rerun_if_address_is_in_use() def test_logical_pg(): diff --git a/tests/test_fp8/test_fp8_allgather.py b/tests/test_fp8/test_fp8_allgather.py index 432d24abf..91e66e83c 100644 --- a/tests/test_fp8/test_fp8_allgather.py +++ b/tests/test_fp8/test_fp8_allgather.py @@ -1,4 +1,3 @@ -import pytest import torch import torch.distributed as dist from torch.distributed.distributed_c10d import _get_default_group @@ -37,7 +36,6 @@ def run_dist(rank, world_size, port): check_4gpu() -@pytest.mark.skip("tested in corresponding sharderformer") @rerun_if_address_is_in_use() def test_all_gather(): spawn(run_dist, 4)