mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-04-27 11:31:58 +00:00
[ci] update ci (#6254)
* fix for async io * test for upgrading transformers * add ci machine * fix * fix * fix * fix * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update test_fp16_torch.py * Update build_on_pr.yml * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * fix * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * fiux * fix * fix * fix --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
7ecdf9a211
commit
46ed5d856b
6
.github/workflows/build_on_pr.yml
vendored
6
.github/workflows/build_on_pr.yml
vendored
@ -87,10 +87,10 @@ jobs:
|
|||||||
name: Build and Test Colossal-AI
|
name: Build and Test Colossal-AI
|
||||||
needs: detect
|
needs: detect
|
||||||
if: needs.detect.outputs.anyLibraryFileChanged == 'true'
|
if: needs.detect.outputs.anyLibraryFileChanged == 'true'
|
||||||
runs-on: [self-hosted, gpu]
|
runs-on: ubuntu-latest
|
||||||
container:
|
container:
|
||||||
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
|
image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
|
||||||
options: --gpus all --rm -v /dev/shm -v /data/scratch:/data/scratch
|
options: --gpus all --shm-size=2g --rm -v /dev/shm -v /data/scratch:/data/scratch
|
||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
|
@ -6,9 +6,10 @@ from torch.testing import assert_close
|
|||||||
from colossalai import launch
|
from colossalai import launch
|
||||||
from colossalai.accelerator import get_accelerator
|
from colossalai.accelerator import get_accelerator
|
||||||
from colossalai.quantization.fp8 import all_to_all_single_fp8
|
from colossalai.quantization.fp8 import all_to_all_single_fp8
|
||||||
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
|
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
|
||||||
|
|
||||||
|
|
||||||
|
@clear_cache_before_run()
|
||||||
@parameterize("shape", [(4,), (1, 8, 16), (4, 8, 16)])
|
@parameterize("shape", [(4,), (1, 8, 16), (4, 8, 16)])
|
||||||
@parameterize("dtype", [torch.bfloat16, torch.float16])
|
@parameterize("dtype", [torch.bfloat16, torch.float16])
|
||||||
@parameterize("async_op", [True, False])
|
@parameterize("async_op", [True, False])
|
||||||
@ -24,6 +25,7 @@ def check_all2all(shape, dtype, async_op):
|
|||||||
assert_close(output, output_fp8, rtol=0.1, atol=0.1)
|
assert_close(output, output_fp8, rtol=0.1, atol=0.1)
|
||||||
|
|
||||||
|
|
||||||
|
@clear_cache_before_run()
|
||||||
@parameterize("shape", [(8, 8, 16)])
|
@parameterize("shape", [(8, 8, 16)])
|
||||||
@parameterize("dtype", [torch.bfloat16, torch.float16])
|
@parameterize("dtype", [torch.bfloat16, torch.float16])
|
||||||
@parameterize("async_op", [True, False])
|
@parameterize("async_op", [True, False])
|
||||||
|
@ -6,9 +6,10 @@ from torch.testing import assert_close
|
|||||||
from colossalai import launch
|
from colossalai import launch
|
||||||
from colossalai.accelerator import get_accelerator
|
from colossalai.accelerator import get_accelerator
|
||||||
from colossalai.quantization.fp8 import _all_to_all_fp8
|
from colossalai.quantization.fp8 import _all_to_all_fp8
|
||||||
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
|
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
|
||||||
|
|
||||||
|
|
||||||
|
@clear_cache_before_run()
|
||||||
@parameterize("shape", [(16, 8, 4)])
|
@parameterize("shape", [(16, 8, 4)])
|
||||||
@parameterize("scatter_dim", [0, 1, 2])
|
@parameterize("scatter_dim", [0, 1, 2])
|
||||||
@parameterize("dtype", [torch.bfloat16, torch.float16])
|
@parameterize("dtype", [torch.bfloat16, torch.float16])
|
||||||
|
@ -6,11 +6,12 @@ from torch.testing import assert_close
|
|||||||
from colossalai import launch
|
from colossalai import launch
|
||||||
from colossalai.accelerator import get_accelerator
|
from colossalai.accelerator import get_accelerator
|
||||||
from colossalai.quantization.fp8 import all_to_all_single_fp8
|
from colossalai.quantization.fp8 import all_to_all_single_fp8
|
||||||
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
|
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
|
||||||
|
|
||||||
dist.all_to_all_single
|
dist.all_to_all_single
|
||||||
|
|
||||||
|
|
||||||
|
@clear_cache_before_run()
|
||||||
@parameterize("shape", [(4), (8, 7), (4, 8, 16)])
|
@parameterize("shape", [(4), (8, 7), (4, 8, 16)])
|
||||||
@parameterize("dtype", [torch.bfloat16, torch.float16])
|
@parameterize("dtype", [torch.bfloat16, torch.float16])
|
||||||
@parameterize("fp8_format", ["e4m3", "e5m2"])
|
@parameterize("fp8_format", ["e4m3", "e5m2"])
|
||||||
|
@ -6,9 +6,10 @@ from torch.testing import assert_close
|
|||||||
from colossalai import launch
|
from colossalai import launch
|
||||||
from colossalai.accelerator import get_accelerator
|
from colossalai.accelerator import get_accelerator
|
||||||
from colossalai.quantization.fp8 import _all_gather_fp8
|
from colossalai.quantization.fp8 import _all_gather_fp8
|
||||||
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
|
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
|
||||||
|
|
||||||
|
|
||||||
|
@clear_cache_before_run()
|
||||||
@parameterize(
|
@parameterize(
|
||||||
"shape",
|
"shape",
|
||||||
[(3, 7, 16)],
|
[(3, 7, 16)],
|
||||||
|
@ -5,7 +5,7 @@ from torch.testing import assert_close
|
|||||||
from colossalai import launch
|
from colossalai import launch
|
||||||
from colossalai.accelerator import get_accelerator
|
from colossalai.accelerator import get_accelerator
|
||||||
from colossalai.quantization.fp8 import all_reduce_fp8
|
from colossalai.quantization.fp8 import all_reduce_fp8
|
||||||
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
|
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
|
||||||
|
|
||||||
|
|
||||||
@parameterize(
|
@parameterize(
|
||||||
@ -20,6 +20,7 @@ from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
|
|||||||
(8,),
|
(8,),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@clear_cache_before_run()
|
||||||
@parameterize("dtype", [torch.float16, torch.bfloat16])
|
@parameterize("dtype", [torch.float16, torch.bfloat16])
|
||||||
@parameterize("fp8_format", ["e4m3", "e5m2"])
|
@parameterize("fp8_format", ["e4m3", "e5m2"])
|
||||||
@parameterize("async_op", [True, False])
|
@parameterize("async_op", [True, False])
|
||||||
|
@ -3,9 +3,10 @@ from torch.testing import assert_close
|
|||||||
|
|
||||||
from colossalai.accelerator import get_accelerator
|
from colossalai.accelerator import get_accelerator
|
||||||
from colossalai.quantization.fp8 import cast_from_fp8, cast_from_fp8_pipeline, cast_to_fp8, cast_to_fp8_pipeline
|
from colossalai.quantization.fp8 import cast_from_fp8, cast_from_fp8_pipeline, cast_to_fp8, cast_to_fp8_pipeline
|
||||||
from colossalai.testing import parameterize
|
from colossalai.testing import clear_cache_before_run, parameterize
|
||||||
|
|
||||||
|
|
||||||
|
@clear_cache_before_run()
|
||||||
@parameterize("shape", [(100, 10), (10, 100), (3, 7), (2, 1), (1, 2), (2, 2), (4, 2), (5,), (4,), (2,)])
|
@parameterize("shape", [(100, 10), (10, 100), (3, 7), (2, 1), (1, 2), (2, 2), (4, 2), (5,), (4,), (2,)])
|
||||||
@parameterize("dtype", [torch.bfloat16, torch.float16, torch.float32])
|
@parameterize("dtype", [torch.bfloat16, torch.float16, torch.float32])
|
||||||
@parameterize("fp8_format", ["e4m3", "e5m2"])
|
@parameterize("fp8_format", ["e4m3", "e5m2"])
|
||||||
|
@ -8,7 +8,7 @@ from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
|
|||||||
from torch.testing import assert_close
|
from torch.testing import assert_close
|
||||||
|
|
||||||
from colossalai import launch
|
from colossalai import launch
|
||||||
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
|
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
|
||||||
|
|
||||||
# example modified from https://pytorch.org/tutorials/intermediate/ddp_tutorial.html
|
# example modified from https://pytorch.org/tutorials/intermediate/ddp_tutorial.html
|
||||||
|
|
||||||
@ -28,6 +28,7 @@ class ToyModel(nn.Module):
|
|||||||
return self.net2(self.relu(self.net1(x)))
|
return self.net2(self.relu(self.net1(x)))
|
||||||
|
|
||||||
|
|
||||||
|
@clear_cache_before_run()
|
||||||
@parameterize("mode", ["grad", "params"])
|
@parameterize("mode", ["grad", "params"])
|
||||||
def run_model(mode):
|
def run_model(mode):
|
||||||
rank = dist.get_rank()
|
rank = dist.get_rank()
|
||||||
|
@ -6,9 +6,10 @@ from torch.testing import assert_close
|
|||||||
from colossalai import launch
|
from colossalai import launch
|
||||||
from colossalai.accelerator import get_accelerator
|
from colossalai.accelerator import get_accelerator
|
||||||
from colossalai.quantization.fp8 import reduce_scatter_fp8
|
from colossalai.quantization.fp8 import reduce_scatter_fp8
|
||||||
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
|
from colossalai.testing import clear_cache_before_run, parameterize, rerun_if_address_is_in_use, spawn
|
||||||
|
|
||||||
|
|
||||||
|
@clear_cache_before_run()
|
||||||
@parameterize("shape", [(16, 8, 4)])
|
@parameterize("shape", [(16, 8, 4)])
|
||||||
@parameterize("scatter_dim", [0, 1, 2])
|
@parameterize("scatter_dim", [0, 1, 2])
|
||||||
@parameterize("dtype", [torch.bfloat16, torch.float16])
|
@parameterize("dtype", [torch.bfloat16, torch.float16])
|
||||||
|
Loading…
Reference in New Issue
Block a user