mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-08-17 15:36:53 +00:00
* [release] update version * [devops] update comp test * [devops] update comp test debug * [devops] debug comp test * [devops] debug comp test * [devops] debug comp test * [devops] debug comp test * [devops] debug comp test
69 lines
2.2 KiB
YAML
69 lines
2.2 KiB
YAML
name: Test Example on Schedule
|
|
on:
|
|
# run at 00:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00
|
|
schedule:
|
|
- cron: '0 16 * * 6'
|
|
workflow_dispatch:
|
|
|
|
jobs:
|
|
# This is for all files' weekly check. Specifically, this job is to find all the directories.
|
|
matrix_preparation:
|
|
if: github.repository == 'hpcaitech/ColossalAI'
|
|
name: Prepare matrix for weekly check
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
matrix: ${{ steps.setup-matrix.outputs.matrix }}
|
|
steps:
|
|
- name: 📚 Checkout
|
|
uses: actions/checkout@v3
|
|
|
|
- name: setup matrix
|
|
id: setup-matrix
|
|
run: |
|
|
res=`python .github/workflows/scripts/example_checks/check_example_weekly.py`
|
|
all_loc=$( IFS=',' ; echo "${res[*]}" )
|
|
echo "Found the examples: $all_loc"
|
|
echo "matrix={\"directory\":$(echo "$all_loc")}" >> $GITHUB_OUTPUT
|
|
|
|
weekly_check:
|
|
if: github.repository == 'hpcaitech/ColossalAI'
|
|
name: Weekly check all examples
|
|
needs: matrix_preparation
|
|
runs-on: [self-hosted, gpu]
|
|
strategy:
|
|
fail-fast: false
|
|
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
|
|
container:
|
|
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
|
|
options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
|
|
timeout-minutes: 30
|
|
steps:
|
|
- name: 📚 Checkout
|
|
uses: actions/checkout@v3
|
|
|
|
- name: Install Colossal-AI
|
|
run: |
|
|
BUILD_EXT=1 pip install -v -e .
|
|
|
|
- name: Traverse all files
|
|
run: |
|
|
example_dir=${{ matrix.directory }}
|
|
echo "Testing ${example_dir} now"
|
|
cd "${PWD}/examples/${example_dir}"
|
|
bash test_ci.sh
|
|
|
|
- name: Notify Lark
|
|
id: message-preparation
|
|
if: ${{ failure() }}
|
|
run: |
|
|
url=$SERVER_URL/$REPO/actions/runs/$RUN_ID
|
|
msg="Example tests failed for $EXAMPLE_DIR, please visit $url for details"
|
|
echo $msg
|
|
python .github/workflows/scripts/send_message_to_lark.py -m "$msg" -u $WEBHOOK_URL
|
|
env:
|
|
SERVER_URL: ${{github.server_url }}
|
|
REPO: ${{ github.repository }}
|
|
RUN_ID: ${{ github.run_id }}
|
|
WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
|
|
EXAMPLE_DIR: ${{ matrix.directory }}
|