mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-04-28 03:43:01 +00:00
* [test] remove cpu marker * [test] remove gpu marker * [test] update pytest markers * [ci] update unit test ci
312 lines
11 KiB
YAML
312 lines
11 KiB
YAML
name: Build on PR
|
|
|
|
on:
|
|
pull_request:
|
|
types: [synchronize, opened, reopened, ready_for_review, closed, edited]
|
|
branches:
|
|
- "main"
|
|
- "develop"
|
|
- "feature/**"
|
|
paths:
|
|
- ".github/workflows/build_on_pr.yml" # run command & env variables change
|
|
- "colossalai/**" # source code change
|
|
- "!colossalai/**.md" # ignore doc change
|
|
- "op_builder/**" # cuda extension change
|
|
- "!op_builder/**.md" # ignore doc change
|
|
- "requirements/**" # requirements change
|
|
- "tests/**" # test change
|
|
- "!tests/**.md" # ignore doc change
|
|
- "pytest.ini" # test config change
|
|
- "setup.py" # install command change
|
|
create:
|
|
delete:
|
|
|
|
jobs:
|
|
prepare_cache:
|
|
name: Prepare testmon cache
|
|
if: |
|
|
github.event_name == 'create' &&
|
|
github.event.ref_type == 'branch' &&
|
|
github.event.repository.full_name == 'hpcaitech/ColossalAI'
|
|
runs-on: [self-hosted, gpu]
|
|
container:
|
|
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
|
options: --rm
|
|
timeout-minutes: 5
|
|
defaults:
|
|
run:
|
|
shell: bash
|
|
steps:
|
|
- name: Copy testmon cache
|
|
run: | # branch name may contain slash, we need to replace it with space
|
|
export REF_BRANCH=$(echo ${{ github.event.ref }} | sed "s/\// /")
|
|
if [ -d /github/home/testmon_cache/${MAIN_BRANCH} ]; then
|
|
cp -p -r /github/home/testmon_cache/${MAIN_BRANCH} "/github/home/testmon_cache/${REF_BRANCH}"
|
|
fi
|
|
env:
|
|
MAIN_BRANCH: ${{ github.event.master_branch }}
|
|
|
|
prepare_cache_for_pr:
|
|
name: Prepare testmon cache for PR
|
|
if: |
|
|
github.event_name == 'pull_request' &&
|
|
(github.event.action == 'opened' || github.event.action == 'reopened' || (github.event.action == 'edited' && github.event.changes.base != null)) &&
|
|
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
|
runs-on: [self-hosted, gpu]
|
|
container:
|
|
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
|
options: --rm
|
|
timeout-minutes: 5
|
|
defaults:
|
|
run:
|
|
shell: bash
|
|
concurrency:
|
|
group: ${{ github.head_ref }}
|
|
cancel-in-progress: false
|
|
steps:
|
|
- name: Copy testmon cache
|
|
run: | # branch name may contain slash, we need to replace it with space
|
|
export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /")
|
|
if [ -d "/github/home/testmon_cache/${BASE}" ] && [ ! -z "$(ls -A "/github/home/testmon_cache/${BASE}")" ]; then
|
|
mkdir -p /github/home/testmon_cache/_pull/${PR_NUMBER} && cp -p -r "/github/home/testmon_cache/${BASE}"/.testmondata* /github/home/testmon_cache/_pull/${PR_NUMBER}
|
|
fi
|
|
env:
|
|
PR_NUMBER: ${{ github.event.number }}
|
|
|
|
detect:
|
|
name: Detect file change
|
|
if: |
|
|
github.event_name == 'pull_request' &&
|
|
(github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.action == 'reopened' || github.event.action == 'ready_for_review') &&
|
|
github.event.pull_request.draft == false &&
|
|
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
|
outputs:
|
|
changedExtenisonFiles: ${{ steps.find-extension-change.outputs.all_changed_files }}
|
|
anyExtensionFileChanged: ${{ steps.find-extension-change.outputs.any_changed }}
|
|
changedLibraryFiles: ${{ steps.find-lib-change.outputs.all_changed_files }}
|
|
anyLibraryFileChanged: ${{ steps.find-lib-change.outputs.any_changed }}
|
|
runs-on: ubuntu-latest
|
|
concurrency:
|
|
group: ${{ github.head_ref }}
|
|
cancel-in-progress: false
|
|
steps:
|
|
- uses: actions/checkout@v2
|
|
with:
|
|
fetch-depth: 0
|
|
ref: ${{ github.event.pull_request.head.sha }}
|
|
|
|
- name: Locate base commit
|
|
id: locate-base-sha
|
|
run: |
|
|
curBranch=$(git rev-parse --abbrev-ref HEAD)
|
|
commonCommit=$(git merge-base origin/main $curBranch)
|
|
echo $commonCommit
|
|
echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
|
|
|
|
- name: Find the changed extension-related files
|
|
id: find-extension-change
|
|
uses: tj-actions/changed-files@v35
|
|
with:
|
|
base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
|
|
files: |
|
|
op_builder/**
|
|
colossalai/kernel/**
|
|
setup.py
|
|
|
|
- name: Find the changed library-related files
|
|
id: find-lib-change
|
|
uses: tj-actions/changed-files@v35
|
|
with:
|
|
base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
|
|
files: |
|
|
**/*.py
|
|
**/*.h
|
|
**/*.cpp
|
|
**/*.cu
|
|
**/*.txt
|
|
|
|
- name: List changed files
|
|
run: |
|
|
for file in ${{ steps.find-extension-change.outputs.all_changed_files }}; do
|
|
echo "$file was changed"
|
|
done
|
|
for file in ${{ steps.find-lib-change.outputs.all_changed_files }}; do
|
|
echo "$file was changed"
|
|
done
|
|
|
|
build:
|
|
name: Build and Test Colossal-AI
|
|
needs: detect
|
|
if: needs.detect.outputs.anyLibraryFileChanged == 'true'
|
|
runs-on: [self-hosted, gpu]
|
|
container:
|
|
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
|
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10
|
|
timeout-minutes: 60
|
|
defaults:
|
|
run:
|
|
shell: bash
|
|
concurrency:
|
|
group: ${{ github.head_ref }}
|
|
cancel-in-progress: false
|
|
steps:
|
|
- name: Checkout TensorNVMe
|
|
uses: actions/checkout@v2
|
|
with:
|
|
repository: hpcaitech/TensorNVMe
|
|
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
|
|
path: TensorNVMe
|
|
|
|
- name: Restore TensorNVMe Cache
|
|
run: |
|
|
if [ -d /github/home/tensornvme_cache ] && [ ! -z "$(ls -A /github/home/tensornvme_cache/)" ]; then
|
|
cp -p -r /github/home/tensornvme_cache/* /__w/ColossalAI/ColossalAI/TensorNVMe
|
|
fi
|
|
|
|
- name: Install TensorNVMe
|
|
run: |
|
|
cd TensorNVMe
|
|
conda install cmake
|
|
pip install -r requirements.txt
|
|
pip install -v .
|
|
|
|
- name: Store TensorNVMe Cache
|
|
run: |
|
|
cd TensorNVMe
|
|
cp -p -r ./build /github/home/tensornvme_cache/
|
|
|
|
- name: Checkout Colossal-AI
|
|
uses: actions/checkout@v2
|
|
with:
|
|
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
|
|
|
|
- name: Restore Colossal-AI Cache
|
|
if: needs.detect.outputs.anyExtensionFileChanged != 'true'
|
|
run: |
|
|
# -p flag is required to preserve the file timestamp to avoid ninja rebuild
|
|
if [ -d /github/home/cuda_ext_cache ] && [ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ]; then
|
|
cp -p -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
|
|
fi
|
|
|
|
- name: Install Colossal-AI
|
|
run: |
|
|
CUDA_EXT=1 pip install -v -e .
|
|
pip install -r requirements/requirements-test.txt
|
|
|
|
- name: Store Colossal-AI Cache
|
|
run: |
|
|
# -p flag is required to preserve the file timestamp to avoid ninja rebuild
|
|
cp -p -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/
|
|
|
|
- name: Restore Testmon Cache
|
|
run: |
|
|
if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ] && [ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ]; then
|
|
cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* /__w/ColossalAI/ColossalAI/
|
|
fi
|
|
env:
|
|
PR_NUMBER: ${{ github.event.number }}
|
|
|
|
- name: Execute Unit Testing
|
|
run: |
|
|
CURL_CA_BUNDLE="" PYTHONPATH=$PWD pytest -m "not largedist" --testmon --testmon-cov=. --durations=10 tests/
|
|
env:
|
|
DATA: /data/scratch/cifar-10
|
|
NCCL_SHM_DISABLE: 1
|
|
LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
|
TESTMON_CORE_PKGS: /__w/ColossalAI/ColossalAI/requirements/requirements.txt,/__w/ColossalAI/ColossalAI/requirements/requirements-test.txt
|
|
|
|
- name: Store Testmon Cache
|
|
run: |
|
|
mkdir -p /github/home/testmon_cache/_pull/${PR_NUMBER}
|
|
cp -p -r /__w/ColossalAI/ColossalAI/.testmondata* /github/home/testmon_cache/_pull/${PR_NUMBER}/
|
|
env:
|
|
PR_NUMBER: ${{ github.event.number }}
|
|
|
|
- name: Collate artifact
|
|
env:
|
|
PR_NUMBER: ${{ github.event.number }}
|
|
changedLibraryFiles: ${{ needs.detect.outputs.changedLibraryFiles }}
|
|
anyLibraryFileChanged: ${{ needs.detect.outputs.anyLibraryFileChanged }}
|
|
changedExtenisonFiles: ${{ needs.detect.outputs.changedExtenisonFiles }}
|
|
run: |
|
|
mkdir report
|
|
echo $PR_NUMBER > ./report/pr_number
|
|
|
|
# generate coverage.xml if any
|
|
if [ "$anyLibraryFileChanged" == "true" ] && [ -e .coverage ]; then
|
|
allFiles=""
|
|
for file in $changedLibraryFiles; do
|
|
if [ "$allFiles" == "" ]; then
|
|
allFiles=$file
|
|
else
|
|
allFiles=$allFiles,$file
|
|
fi
|
|
done
|
|
|
|
coverage report --data-file .coverage --include $allFiles > ./coverage.txt
|
|
|
|
covPercentage=$(tail -n 1 coverage.txt | grep -o '[1-9]*%$')
|
|
covNum=${covPercentage::-1}
|
|
mv coverage.txt ./report
|
|
echo $covNum > ./report/cov_number
|
|
else
|
|
echo "No coverage report is generated"
|
|
fi
|
|
|
|
- name: Upload test coverage artifact
|
|
uses: actions/upload-artifact@v3
|
|
with:
|
|
name: report
|
|
path: report/
|
|
|
|
store_cache:
|
|
name: Store testmon cache for PR
|
|
if: |
|
|
github.event_name == 'pull_request' &&
|
|
github.event.action == 'closed' &&
|
|
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
|
runs-on: [self-hosted, gpu]
|
|
container:
|
|
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
|
options: --rm
|
|
timeout-minutes: 5
|
|
defaults:
|
|
run:
|
|
shell: bash
|
|
steps:
|
|
- name: Store testmon cache if possible
|
|
if: github.event.pull_request.merged == true
|
|
run: | # branch name may contain slash, we need to replace it with space
|
|
export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /")
|
|
if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ] && [ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ]; then
|
|
cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* "/github/home/testmon_cache/${BASE}/"
|
|
fi
|
|
env:
|
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
|
|
- name: Remove testmon cache
|
|
run: |
|
|
rm -rf /github/home/testmon_cache/_pull/${PR_NUMBER}
|
|
env:
|
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
|
|
remove_cache:
|
|
name: Remove testmon cache
|
|
if: |
|
|
github.event_name == 'delete' &&
|
|
github.event.ref_type == 'branch' &&
|
|
github.event.repository.full_name == 'hpcaitech/ColossalAI'
|
|
runs-on: [self-hosted, gpu]
|
|
container:
|
|
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
|
options: --rm
|
|
timeout-minutes: 5
|
|
defaults:
|
|
run:
|
|
shell: bash
|
|
steps:
|
|
- name: Remove testmon cache
|
|
run: | # branch name may contain slash, we need to replace it with space
|
|
export BASE=$(echo ${{ github.event.ref }} | sed "s/\// /")
|
|
rm -rf "/github/home/testmon_cache/${BASE}"
|