name: Build on PR on: pull_request: types: [synchronize, labeled] jobs: detect: name: Detect file change if: | github.event.pull_request.draft == false && github.base_ref == 'main' && github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && contains( github.event.pull_request.labels.*.name, 'Run Build and Test') outputs: changedExtenisonFiles: ${{ steps.find-extension-change.outputs.all_changed_files }} anyExtensionFileChanged: ${{ steps.find-extension-change.outputs.any_changed }} changedLibraryFiles: ${{ steps.find-lib-change.outputs.all_changed_files }} anyLibraryFileChanged: ${{ steps.find-lib-change.outputs.any_changed }} runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 with: fetch-depth: 0 ref: ${{ github.event.pull_request.head.sha }} - name: Locate base commit id: locate-base-sha run: | curBranch=$(git rev-parse --abbrev-ref HEAD) commonCommit=$(git merge-base origin/main $curBranch) echo $commonCommit echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT - name: Find the changed extension-related files id: find-extension-change uses: tj-actions/changed-files@v35 with: base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }} files: | op_builder/** colossalai/kernel/** setup.py - name: Find the changed library-related files id: find-lib-change uses: tj-actions/changed-files@v35 with: base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }} files: | **/*.py **/*.h **/*.cpp **/*.cu **/*.txt - name: List changed files run: | for file in ${{ steps.find-extension-change.outputs.all_changed_files }}; do echo "$file was changed" done for file in ${{ steps.find-lib-change.outputs.all_changed_files }}; do echo "$file was changed" done build: name: Build and Test Colossal-AI needs: detect runs-on: [self-hosted, gpu] container: image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10 timeout-minutes: 60 defaults: run: shell: bash steps: - name: Checkout TensorNVMe uses: actions/checkout@v2 with: repository: hpcaitech/TensorNVMe ssh-key: ${{ secrets.SSH_KEY_FOR_CI }} path: TensorNVMe - name: Restore TensorNVMe Cache run: | [ ! -z "$(ls -A /github/home/tensornvme_cache/)" ] && cp -p -r /github/home/tensornvme_cache/* /__w/ColossalAI/ColossalAI/TensorNVMe - name: Install TensorNVMe run: | cd TensorNVMe conda install cmake pip install -r requirements.txt pip install -v . - name: Store TensorNVMe Cache run: | cd TensorNVMe cp -p -r ./build /github/home/tensornvme_cache/ - name: Checkout Colossal-AI uses: actions/checkout@v2 with: ssh-key: ${{ secrets.SSH_KEY_FOR_CI }} - name: Restore Colossal-AI Cache if: needs.detect.outputs.anyExtensionFileChanged != 'true' run: | # -p flag is required to preserve the file timestamp to avoid ninja rebuild [ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -p -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/ - name: Install Colossal-AI if: needs.detect.outputs.anyLibraryFileChanged == 'true' run: | CUDA_EXT=1 pip install -v -e . pip install -r requirements/requirements-test.txt - name: Store Colossal-AI Cache run: | # -p flag is required to preserve the file timestamp to avoid ninja rebuild cp -p -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/ - name: Restore Testmon Cache run: | if [ -d /github/home/testmon_cache ]; then [ ! -z "$(ls -A /github/home/testmon_cache)" ] && cp -p -r /github/home/testmon_cache/.testmondata /__w/ColossalAI/ColossalAI/ fi - name: Execute Unit Testing if: needs.detect.outputs.anyLibraryFileChanged == 'true' run: | CURL_CA_BUNDLE="" PYTHONPATH=$PWD pytest --testmon --testmon-cov=. tests/ env: DATA: /data/scratch/cifar-10 NCCL_SHM_DISABLE: 1 LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 - name: Store Testmon Cache run: | [ -d /github/home/testmon_cache ] || mkdir /github/home/testmon_cache cp -p -r /__w/ColossalAI/ColossalAI/.testmondata /github/home/testmon_cache/ - name: Collate artifact env: PR_NUMBER: ${{ github.event.number }} changedLibraryFiles: ${{ needs.detect.outputs.changedLibraryFiles }} anyLibraryFileChanged: ${{ needs.detect.outputs.anyLibraryFileChanged }} changedExtenisonFiles: ${{ needs.detect.outputs.changedExtenisonFiles }} run: | mkdir report echo $PR_NUMBER > ./report/pr_number # generate coverage.xml if any if [ "$anyLibraryFileChanged" == "true" ] && [ -e .coverage ]; then allFiles="" for file in $changedLibraryFiles; do if [ "$allFiles" == "" ]; then allFiles=$file else allFiles=$allFiles,$file fi done coverage report --data-file .coverage --include $allFiles > ./coverage.txt covPercentage=$(tail -n 1 coverage.txt | grep -o '[1-9]*%$') covNum=${covPercentage::-1} mv coverage.txt ./report echo $covNum > ./report/cov_number else echo "No coverage report is generated" fi - name: Upload test coverage artifact uses: actions/upload-artifact@v3 with: name: report path: report/