diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index 89b7f1f3b..50d488f18 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -87,10 +87,10 @@ jobs: name: Build and Test Colossal-AI needs: detect if: needs.detect.outputs.anyLibraryFileChanged == 'true' - runs-on: [self-hosted, gpu] + runs-on: ubuntu-latest container: - image: hpcaitech/pytorch-cuda:2.2.2-12.1.0 - options: --gpus all --rm -v /dev/shm -v /data/scratch:/data/scratch + image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0 + options: --gpus all --shm-size=2g --rm -v /dev/shm -v /data/scratch:/data/scratch timeout-minutes: 90 defaults: run: diff --git a/tests/test_fp8/test_all_to_all_single.py b/tests/test_fp8/test_all_to_all_single.py index 722cbce9a..448a3f031 100644 --- a/tests/test_fp8/test_all_to_all_single.py +++ b/tests/test_fp8/test_all_to_all_single.py @@ -6,9 +6,10 @@ from torch.testing import assert_close from colossalai import launch from colossalai.accelerator import get_accelerator from colossalai.quantization.fp8 import all_to_all_single_fp8 -from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn +from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn +@clear_cache_before_run() @parameterize("shape", [(4,), (1, 8, 16), (4, 8, 16)]) @parameterize("dtype", [torch.bfloat16, torch.float16]) @parameterize("async_op", [True, False]) @@ -24,6 +25,7 @@ def check_all2all(shape, dtype, async_op): assert_close(output, output_fp8, rtol=0.1, atol=0.1) +@clear_cache_before_run() @parameterize("shape", [(8, 8, 16)]) @parameterize("dtype", [torch.bfloat16, torch.float16]) @parameterize("async_op", [True, False]) diff --git a/tests/test_fp8/test_fp8_all_to_all.py b/tests/test_fp8/test_fp8_all_to_all.py index 98bbbad85..a86741b4c 100644 --- a/tests/test_fp8/test_fp8_all_to_all.py +++ b/tests/test_fp8/test_fp8_all_to_all.py @@ -6,9 +6,10 @@ from torch.testing import assert_close from colossalai import launch from colossalai.accelerator import get_accelerator from colossalai.quantization.fp8 import _all_to_all_fp8 -from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn +from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn +@clear_cache_before_run() @parameterize("shape", [(16, 8, 4)]) @parameterize("scatter_dim", [0, 1, 2]) @parameterize("dtype", [torch.bfloat16, torch.float16]) diff --git a/tests/test_fp8/test_fp8_all_to_all_single.py b/tests/test_fp8/test_fp8_all_to_all_single.py index 70765f2d4..a301301b3 100644 --- a/tests/test_fp8/test_fp8_all_to_all_single.py +++ b/tests/test_fp8/test_fp8_all_to_all_single.py @@ -6,11 +6,12 @@ from torch.testing import assert_close from colossalai import launch from colossalai.accelerator import get_accelerator from colossalai.quantization.fp8 import all_to_all_single_fp8 -from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn +from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn dist.all_to_all_single +@clear_cache_before_run() @parameterize("shape", [(4), (8, 7), (4, 8, 16)]) @parameterize("dtype", [torch.bfloat16, torch.float16]) @parameterize("fp8_format", ["e4m3", "e5m2"]) diff --git a/tests/test_fp8/test_fp8_allgather.py b/tests/test_fp8/test_fp8_allgather.py index 91e66e83c..79b55395d 100644 --- a/tests/test_fp8/test_fp8_allgather.py +++ b/tests/test_fp8/test_fp8_allgather.py @@ -6,9 +6,10 @@ from torch.testing import assert_close from colossalai import launch from colossalai.accelerator import get_accelerator from colossalai.quantization.fp8 import _all_gather_fp8 -from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn +from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn +@clear_cache_before_run() @parameterize( "shape", [(3, 7, 16)], diff --git a/tests/test_fp8/test_fp8_allreduce.py b/tests/test_fp8/test_fp8_allreduce.py index ccc43ed29..297b05e48 100644 --- a/tests/test_fp8/test_fp8_allreduce.py +++ b/tests/test_fp8/test_fp8_allreduce.py @@ -5,7 +5,7 @@ from torch.testing import assert_close from colossalai import launch from colossalai.accelerator import get_accelerator from colossalai.quantization.fp8 import all_reduce_fp8 -from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn +from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn @parameterize( @@ -20,6 +20,7 @@ from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn (8,), ], ) +@clear_cache_before_run() @parameterize("dtype", [torch.float16, torch.bfloat16]) @parameterize("fp8_format", ["e4m3", "e5m2"]) @parameterize("async_op", [True, False]) diff --git a/tests/test_fp8/test_fp8_cast.py b/tests/test_fp8/test_fp8_cast.py index db9a909e6..479cb3770 100644 --- a/tests/test_fp8/test_fp8_cast.py +++ b/tests/test_fp8/test_fp8_cast.py @@ -3,9 +3,10 @@ from torch.testing import assert_close from colossalai.accelerator import get_accelerator from colossalai.quantization.fp8 import cast_from_fp8, cast_from_fp8_pipeline, cast_to_fp8, cast_to_fp8_pipeline -from colossalai.testing import parameterize +from colossalai.testing import clear_cache_before_run, parameterize +@clear_cache_before_run() @parameterize("shape", [(100, 10), (10, 100), (3, 7), (2, 1), (1, 2), (2, 2), (4, 2), (5,), (4,), (2,)]) @parameterize("dtype", [torch.bfloat16, torch.float16, torch.float32]) @parameterize("fp8_format", ["e4m3", "e5m2"]) diff --git a/tests/test_fp8/test_fp8_fsdp_comm_hook.py b/tests/test_fp8/test_fp8_fsdp_comm_hook.py index 3d0660961..a95fbdf01 100644 --- a/tests/test_fp8/test_fp8_fsdp_comm_hook.py +++ b/tests/test_fp8/test_fp8_fsdp_comm_hook.py @@ -8,7 +8,7 @@ from torch.distributed.fsdp import FullyShardedDataParallel as FSDP from torch.testing import assert_close from colossalai import launch -from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn +from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn # example modified from https://pytorch.org/tutorials/intermediate/ddp_tutorial.html @@ -28,6 +28,7 @@ class ToyModel(nn.Module): return self.net2(self.relu(self.net1(x))) +@clear_cache_before_run() @parameterize("mode", ["grad", "params"]) def run_model(mode): rank = dist.get_rank() diff --git a/tests/test_fp8/test_fp8_reduce_scatter.py b/tests/test_fp8/test_fp8_reduce_scatter.py index e0b558a25..a2eac1c7e 100644 --- a/tests/test_fp8/test_fp8_reduce_scatter.py +++ b/tests/test_fp8/test_fp8_reduce_scatter.py @@ -6,9 +6,10 @@ from torch.testing import assert_close from colossalai import launch from colossalai.accelerator import get_accelerator from colossalai.quantization.fp8 import reduce_scatter_fp8 -from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn +from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn +@clear_cache_before_run() @parameterize("shape", [(16, 8, 4)]) @parameterize("scatter_dim", [0, 1, 2]) @parameterize("dtype", [torch.bfloat16, torch.float16])