[CI] Remove triton version for compatibility bug; update req torch >=2.2 (#6018)

* remove triton version

* remove torch 2.2

* remove torch 2.1

* debug

* remove 2.1 build tests

* require torch >=2.2

---------

Co-authored-by: Edenzzzz <wtan45@wisc.edu>
This commit is contained in:
Wenxuan Tan
2024-08-27 10:12:21 +08:00
committed by GitHub
parent 17904cb5bf
commit d383449fc4
13 changed files with 12 additions and 13 deletions

View File

@@ -89,7 +89,7 @@ jobs:
if: needs.detect.outputs.anyLibraryFileChanged == 'true'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /dev/shm -v /data/scratch:/data/scratch
timeout-minutes: 90
defaults:

View File

@@ -12,7 +12,7 @@ jobs:
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /dev/shm -v /data/scratch/:/data/scratch/
timeout-minutes: 90
steps:

View File

@@ -56,7 +56,7 @@ jobs:
needs: detect-changed-doc
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm
timeout-minutes: 30
defaults:

View File

@@ -12,7 +12,7 @@ jobs:
name: Test the changed Doc
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm
timeout-minutes: 60
steps:

View File

@@ -45,7 +45,7 @@ jobs:
fail-fast: false
matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
timeout-minutes: 15
steps:

View File

@@ -90,7 +90,7 @@ jobs:
fail-fast: false
matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
timeout-minutes: 30
concurrency:

View File

@@ -34,7 +34,7 @@ jobs:
fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
timeout-minutes: 30
steps:

View File

@@ -19,7 +19,7 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data --shm-size=10.24gb
timeout-minutes: 60
defaults:

View File

@@ -19,7 +19,7 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data
timeout-minutes: 30
defaults:

View File

@@ -19,7 +19,7 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
volumes:
- /data/scratch/test_data_colossalqa:/data/scratch/test_data_colossalqa
- /data/scratch/llama-tiny:/data/scratch/llama-tiny