This commit is contained in:
flybird11111 2025-05-28 11:13:44 +08:00
parent d3c40b9de4
commit 4afff92138
25 changed files with 39 additions and 39 deletions

View File

@ -2,11 +2,11 @@
"build": [
{
"torch_command": "pip install torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121",
"cuda_image": "hpcaitech/cuda-conda:12.1"
"cuda_image": "image-cloud.luchentech.com/hpcaitech/cuda-conda:12.1"
},
{
"torch_command": "pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu124",
"cuda_image": "hpcaitech/cuda-conda:12.4"
"cuda_image": "image-cloud.luchentech.com/hpcaitech/cuda-conda:12.4"
}
]
}

View File

@ -12,7 +12,7 @@ jobs:
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /dev/shm -v /data/scratch/:/data/scratch/
timeout-minutes: 90
steps:

View File

@ -7,7 +7,7 @@ on:
jobs:
close-issues:
if: github.event.pull_request.draft == false && github.base_ref == 'main' && github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
permissions:
issues: write
pull-requests: write

View File

@ -15,7 +15,7 @@ on:
jobs:
matrix_preparation:
name: Prepare Container List
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
@ -31,7 +31,7 @@ jobs:
do
for cv in $CUDA_VERSIONS
do
DOCKER_IMAGE+=("\"hpcaitech/pytorch-cuda:${tv}-${cv}\"")
DOCKER_IMAGE+=("\"image-cloud.luchentech.com/hpcaitech/pytorch-cuda:${tv}-${cv}\"")
done
done
@ -44,7 +44,7 @@ jobs:
name: Test for PyTorch Compatibility
needs: matrix_preparation
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, 8-gpu]
runs-on: [self-hosted, ubuntu-latest]
strategy:
fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}

View File

@ -9,7 +9,7 @@ on:
jobs:
matrix_preparation:
name: Prepare Container List
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
concurrency:
@ -23,7 +23,7 @@ jobs:
DOCKER_IMAGE=()
while read tag; do
DOCKER_IMAGE+=("\"hpcaitech/pytorch-cuda:${tag}\"")
DOCKER_IMAGE+=("\"image-cloud.luchentech.com/hpcaitech/pytorch-cuda:${tag}\"")
done <.compatibility
container=$( IFS=',' ; echo "${DOCKER_IMAGE[*]}" )
@ -35,7 +35,7 @@ jobs:
name: Test for PyTorch Compatibility
needs: matrix_preparation
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, 8-gpu]
runs-on: [self-hosted, ubuntu-latest]
strategy:
fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}

View File

@ -9,7 +9,7 @@ on:
jobs:
matrix_preparation:
name: Prepare Container List
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
@ -20,7 +20,7 @@ jobs:
DOCKER_IMAGE=()
while read tag; do
DOCKER_IMAGE+=("\"hpcaitech/pytorch-cuda:${tag}\"")
DOCKER_IMAGE+=("\"image-cloud.luchentech.com/hpcaitech/pytorch-cuda:${tag}\"")
done <.compatibility
container=$( IFS=',' ; echo "${DOCKER_IMAGE[*]}" )
@ -32,7 +32,7 @@ jobs:
name: Test for PyTorch Compatibility
needs: matrix_preparation
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, 8-gpu]
runs-on: [self-hosted, ubuntu-latest]
strategy:
fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}

View File

@ -11,7 +11,7 @@ jobs:
build-doc:
name: Trigger Documentation Build Workflow
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
steps:
- name: trigger workflow in ColossalAI-Documentation
run: |

View File

@ -15,7 +15,7 @@ jobs:
if: |
github.event.pull_request.draft == false &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: ubuntu-latest
runs-on: ubuntu-[self-hosted, ubuntu-latest]
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-check-i18n
cancel-in-progress: true
@ -33,7 +33,7 @@ jobs:
if: |
github.event.pull_request.draft == false &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-check-doc
cancel-in-progress: true

View File

@ -15,7 +15,7 @@ jobs:
if: |
github.event.pull_request.draft == false &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
outputs:
any_changed: ${{ steps.changed-files.outputs.any_changed }}
changed_files: ${{ steps.changed-files.outputs.all_changed_files }}
@ -56,7 +56,7 @@ jobs:
needs: detect-changed-doc
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm
timeout-minutes: 30
defaults:

View File

@ -12,7 +12,7 @@ jobs:
name: Test the changed Doc
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm
timeout-minutes: 60
steps:

View File

@ -12,7 +12,7 @@ jobs:
release:
name: Draft Release Post
if: ( github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true ) && github.repository == 'hpcaitech/ColossalAI'
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
steps:
- uses: actions/checkout@v2
with:

View File

@ -14,7 +14,7 @@ jobs:
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
name: Check the examples user want
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
@ -45,7 +45,7 @@ jobs:
fail-fast: false
matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
timeout-minutes: 15
steps:

View File

@ -17,7 +17,7 @@ jobs:
if: |
github.event.pull_request.draft == false &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
outputs:
matrix: ${{ steps.setup-matrix.outputs.matrix }}
anyChanged: ${{ steps.setup-matrix.outputs.anyChanged }}
@ -90,7 +90,7 @@ jobs:
fail-fast: false
matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
timeout-minutes: 30
concurrency:

View File

@ -10,7 +10,7 @@ jobs:
matrix_preparation:
if: github.repository == 'hpcaitech/ColossalAI'
name: Prepare matrix for weekly check
runs-on: ubuntu-latest
runs-on: ubunt[self-hosted, ubuntu-latest]u-latest
outputs:
matrix: ${{ steps.setup-matrix.outputs.matrix }}
steps:
@ -34,7 +34,7 @@ jobs:
fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
timeout-minutes: 30
steps:

View File

@ -46,7 +46,7 @@ jobs:
notify:
name: Notify Lark via webhook
needs: release
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
if: ${{ always() }}
steps:
- uses: actions/checkout@v2

View File

@ -9,7 +9,7 @@ jobs:
publish:
if: github.repository == 'hpcaitech/ColossalAI'
name: Build and publish Python 🐍 distributions 📦 to PyPI
runs-on: ubuntu-latest
runs-on: ubuntu-[self-hosted, ubuntu-latest]
timeout-minutes: 20
outputs:
status: ${{ steps.publish.outcome }}
@ -36,7 +36,7 @@ jobs:
notify:
name: Notify Lark via webhook
needs: publish
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
if: ${{ always() }} && github.repository == 'hpcaitech/ColossalAI'
steps:
- uses: actions/checkout@v2

View File

@ -12,7 +12,7 @@ jobs:
build-n-publish:
if: github.event_name == 'workflow_dispatch' || github.repository == 'hpcaitech/ColossalAI' && github.event.pull_request.merged == true && github.base_ref == 'main'
name: Build and publish Python 🐍 distributions 📦 to PyPI
runs-on: ubuntu-latest
runs-on: ubuntu-[self-hosted, ubuntu-latest]
timeout-minutes: 20
steps:
- uses: actions/checkout@v2
@ -35,7 +35,7 @@ jobs:
notify:
name: Notify Lark via webhook
needs: build-n-publish
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
if: ${{ always() }}
steps:
- uses: actions/checkout@v2

View File

@ -9,7 +9,7 @@ jobs:
build-n-publish:
if: github.event_name == 'workflow_dispatch' || github.repository == 'hpcaitech/ColossalAI'
name: Build and publish Python 🐍 distributions 📦 to Test PyPI
runs-on: ubuntu-latest
runs-on: [self-hosted, ubuntu-latest]-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v2

View File

@ -10,7 +10,7 @@ jobs:
generate-and-publish:
if: github.repository == 'hpcaitech/ColossalAI'
name: Generate leaderboard report and publish to Lark
runs-on: ubuntu-latest
runs-on: ubuntu-[self-hosted, ubuntu-latest]
timeout-minutes: 20
steps:
- uses: actions/checkout@v2

View File

@ -8,7 +8,7 @@ on:
jobs:
report-test-coverage:
runs-on: ubuntu-latest
runs-on: ubuntu-[self-hosted, ubuntu-latest]
if: ${{ github.event.workflow_run.conclusion == 'success' }}
steps:
- name: "Download artifact"

View File

@ -19,7 +19,7 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data --shm-size=10.24gb
timeout-minutes: 60
defaults:

View File

@ -19,7 +19,7 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data
timeout-minutes: 30
defaults:

View File

@ -19,7 +19,7 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
image: image-cloud.luchentech.com/hpcaitech/pytorch-cuda:2.2.2-12.1.0
volumes:
- /data/scratch/test_data_colossalqa:/data/scratch/test_data_colossalqa
- /data/scratch/llama-tiny:/data/scratch/llama-tiny

View File

@ -7,7 +7,7 @@ on:
jobs:
sync-submodule:
runs-on: ubuntu-latest
runs-on: ubuntu-[self-hosted, ubuntu-latest]
if: github.repository == 'hpcaitech/ColossalAI'
steps:
- name: Checkout

View File

@ -7,7 +7,7 @@ on:
jobs:
build:
runs-on: ubuntu-latest
runs-on: ubuntu-[self-hosted, ubuntu-latest]
steps:
- uses: usthe/issues-translate-action@v2.7
with: