diff --git a/.github/workflows/README.md b/.github/workflows/README.md index f40f4cc86..3fad7e36f 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -43,10 +43,18 @@ I will provide the details of each workflow below. | Workflow Name | File name | Description | | ---------------------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | -| `Build on PR` | `build_on_pr.yml` | This workflow is triggered when a PR changes essential files. It will run all the unit tests in the repository with 4 GPUs. | +| `Build on PR` | `build_on_pr.yml` | This workflow is triggered when a PR changes essential files and a branch is created/deleted. It will run all the unit tests in the repository with 4 GPUs. | | `Build on Schedule` | `build_on_schedule.yml` | This workflow will run the unit tests everyday with 8 GPUs. The result is sent to Lark. | | `Report test coverage` | `report_test_coverage.yml` | This PR will put up a comment to report the test coverage results when `Build` is done. | +To reduce the average time of the unit test on PR, `Build on PR` workflow manages testmon cache. + +1. When creating a new branch, it copies `cache/main/.testmondata*` to `cache//`. +2. When creating a new PR or change the base branch of a PR, it copies `cache//.testmondata*` to `cache/_pull//`. +3. When running unit tests for each PR, it restores testmon cache from `cache/_pull//`. After the test, it stores the cache back to `cache/_pull//`. +4. When a PR is closed, if it's merged, it copies `cache/_pull//.testmondata*` to `cache//`. Otherwise, it just removes `cache/_pull/`. +5. When a branch is deleted, it removes `cache/`. + ### Example Test | Workflow Name | File name | Description | diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index a5a17d176..b5f293107 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -2,7 +2,7 @@ name: Build on PR on: pull_request: - types: [synchronize, opened, reopened] + types: [synchronize, opened, reopened, ready_for_review, closed, edited] branches: - "main" - "develop" @@ -18,11 +18,63 @@ on: - "!tests/**.md" # ignore doc change - "pytest.ini" # test config change - "setup.py" # install command change + create: + delete: jobs: + prepare_cache: + name: Prepare testmon cache + if: | + github.event_name == 'create' && + github.event.ref_type == 'branch' && + github.event.repository.full_name == 'hpcaitech/ColossalAI' + runs-on: [self-hosted, gpu] + container: + image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 + options: --rm + timeout-minutes: 5 + defaults: + run: + shell: bash + steps: + - name: Copy testmon cache + run: | # branch name may contain slash, we need to replace it with space + export REF_BRANCH=$(echo ${{ github.event.ref }} | sed "s/\// /") + if [ -d /github/home/testmon_cache/${MAIN_BRANCH} ]; then + [ ! -z "$(ls -A /github/home/testmon_cache/${MAIN_BRANCH})" ] && cp -p -r /github/home/testmon_cache/${MAIN_BRANCH} "/github/home/testmon_cache/${REF_BRANCH}" + fi + env: + MAIN_BRANCH: ${{ github.event.master_branch }} + + prepare_cache_for_pr: + name: Prepare testmon cache for PR + if: | + github.event_name == 'pull_request' && + (github.event.action == 'opened' || github.event.action == 'reopened' || (github.event.action == 'edited' && github.event.changes.base != null)) && + github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' + runs-on: [self-hosted, gpu] + container: + image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 + options: --rm + timeout-minutes: 5 + defaults: + run: + shell: bash + steps: + - name: Copy testmon cache + run: | # branch name may contain slash, we need to replace it with space + export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /") + if [ -d "/github/home/testmon_cache/${BASE}" ]; then + [ ! -z "$(ls -A "/github/home/testmon_cache/${BASE}")" ] && mkdir /github/home/testmon_cache/_pull && cp -p -r "/github/home/testmon_cache/${BASE}" /github/home/testmon_cache/_pull/${PR_NUMBER} + fi + env: + PR_NUMBER: ${{ github.event.pull_request.head.ref }} + detect: name: Detect file change if: | + github.event_name == 'pull_request' && + (github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.action == 'reopened' || github.event.action == 'ready_for_review') && github.event.pull_request.draft == false && github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' outputs: @@ -135,9 +187,11 @@ jobs: - name: Restore Testmon Cache run: | - if [ -d /github/home/testmon_cache ]; then - [ ! -z "$(ls -A /github/home/testmon_cache)" ] && cp -p -r /github/home/testmon_cache/.testmondata* /__w/ColossalAI/ColossalAI/ + if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ]; then + [ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* /__w/ColossalAI/ColossalAI/ fi + env: + PR_NUMBER: ${{ github.event.number }} - name: Execute Unit Testing run: | @@ -149,8 +203,10 @@ jobs: - name: Store Testmon Cache run: | - [ -d /github/home/testmon_cache ] || mkdir /github/home/testmon_cache - cp -p -r /__w/ColossalAI/ColossalAI/.testmondata* /github/home/testmon_cache/ + mkdir -p /github/home/testmon_cache/_pull/${PR_NUMBER} + cp -p -r /__w/ColossalAI/ColossalAI/.testmondata* /github/home/testmon_cache/_pull/${PR_NUMBER}/ + env: + PR_NUMBER: ${{ github.event.number }} - name: Collate artifact env: @@ -188,3 +244,55 @@ jobs: with: name: report path: report/ + + store_cache: + name: Store testmon cache for PR + if: | + github.event_name == 'pull_request' && + github.event.action == 'closed' && + github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' + runs-on: [self-hosted, gpu] + container: + image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 + options: --rm + timeout-minutes: 5 + defaults: + run: + shell: bash + steps: + - name: Store testmon cache if possible + if: github.event.pull_request.merged == true + run: | # branch name may contain slash, we need to replace it with space + export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /") + if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ]; then + [ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* "/github/home/testmon_cache/${BASE}/" + fi + env: + PR_NUMBER: ${{ github.event.pull_request.number }} + + - name: Remove testmon cache + if: github.event.pull_request.merged != true + run: | + rm -rf /github/home/testmon_cache/_pull/${PR_NUMBER} + env: + PR_NUMBER: ${{ github.event.pull_request.number }} + + remove_cache: + name: Remove testmon cache + if: | + github.event_name == 'delete' && + github.event.ref_type == 'branch' && + github.event.repository.full_name == 'hpcaitech/ColossalAI' + runs-on: [self-hosted, gpu] + container: + image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 + options: --rm + timeout-minutes: 5 + defaults: + run: + shell: bash + steps: + - name: Remove testmon cache + run: | # branch name may contain slash, we need to replace it with space + export BASE=$(echo ${{ github.event.ref }} | sed "s/\// /") + rm -rf "/github/home/testmon_cache/${BASE}"