mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-07-19 01:39:26 +00:00
[workflow] added regular 8 GPU testing (#1099)
* [workflow] added regular 8 GPU testing * polish workflow
This commit is contained in:
parent
7f2d2b2b5b
commit
03e52ecba3
37
.github/workflows/build_gpu_8.yml
vendored
Normal file
37
.github/workflows/build_gpu_8.yml
vendored
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
name: Build on 8 GPUs
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
# run at 00:00 of every Sunday
|
||||||
|
- cron: '0 0 * * *'
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
name: Build and Test Colossal-AI
|
||||||
|
if: |
|
||||||
|
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
||||||
|
runs-on: [self-hosted, 8-gpu]
|
||||||
|
container:
|
||||||
|
image: hpcaitech/pytorch-cuda:1.10.1-11.3.0
|
||||||
|
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10
|
||||||
|
timeout-minutes: 40
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
|
||||||
|
- name: Install Colossal-AI
|
||||||
|
run: |
|
||||||
|
[ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
|
||||||
|
pip install -r requirements/requirements.txt
|
||||||
|
pip install -v -e .
|
||||||
|
cp -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/
|
||||||
|
cp /__w/ColossalAI/ColossalAI/*.so /github/home/cuda_ext_cache/
|
||||||
|
pip install -r requirements/requirements-test.txt
|
||||||
|
- name: Unit Testing
|
||||||
|
run: |
|
||||||
|
gpu_used=$(nvidia-smi -i 0 --query-gpu=memory.used --format=csv,noheader,nounits)
|
||||||
|
[ "$gpu_used" -gt "100" ] && PYTHONPATH=$PWD pytest tests
|
||||||
|
env:
|
||||||
|
DATA: /data/scratch/cifar-10
|
||||||
|
|
14
.github/workflows/release_nightly.yml
vendored
14
.github/workflows/release_nightly.yml
vendored
@ -5,16 +5,7 @@ on:
|
|||||||
# run at 00:00 of every Sunday
|
# run at 00:00 of every Sunday
|
||||||
- cron: '0 0 * * 6'
|
- cron: '0 0 * * 6'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
|
||||||
cuda_version:
|
|
||||||
type: choice
|
|
||||||
description: CUDA Version
|
|
||||||
default: "all"
|
|
||||||
required: true
|
|
||||||
options:
|
|
||||||
- "all"
|
|
||||||
- "11.3"
|
|
||||||
- "10.2"
|
|
||||||
jobs:
|
jobs:
|
||||||
matrix_preparation:
|
matrix_preparation:
|
||||||
name: Prepare Container List
|
name: Prepare Container List
|
||||||
@ -24,8 +15,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- id: set-matrix
|
- id: set-matrix
|
||||||
run: |
|
run: |
|
||||||
[ "${{github.event.inputs.cuda_version}}" != "all" ] && matrix="[\"hpcaitech/cuda-conda:${{github.event.inputs.cuda_version}}\"]"
|
matrix="[\"hpcaitech/cuda-conda:11.3\", \"hpcaitech/cuda-conda:10.2\"]"
|
||||||
[ "${{github.event.inputs.cuda_version}}" == "all" || "${{github.event.inputs.cuda_version}}" == "" ] && matrix="[\"hpcaitech/cuda-conda:11.3\", \"hpcaitech/cuda-conda:10.2\"]"
|
|
||||||
echo $matrix
|
echo $matrix
|
||||||
echo "::set-output name=matrix::{\"container\":$(echo $matrix)}"
|
echo "::set-output name=matrix::{\"container\":$(echo $matrix)}"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user