mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-04-27 03:21:47 +00:00
* [shardformer] support pipeline for deepseek v3 * [checkpointio] fix lora save * [devops] update ci env * [booster] optimize lora * fix test * fix test
83 lines
2.4 KiB
YAML
83 lines
2.4 KiB
YAML
name: Compatibility Test on Dispatch
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
torch_version:
|
|
type: string
|
|
description: torch version, separated by comma
|
|
required: true
|
|
cuda_version:
|
|
type: string
|
|
description: cuda version, separated by comma
|
|
required: true
|
|
|
|
jobs:
|
|
matrix_preparation:
|
|
name: Prepare Container List
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
|
steps:
|
|
- id: set-matrix
|
|
env:
|
|
TORCH_VERSIONS: ${{ inputs.torch_version }}
|
|
CUDA_VERSIONS: ${{ inputs.cuda_version }}
|
|
run: |
|
|
IFS=','
|
|
DOCKER_IMAGE=()
|
|
|
|
for tv in $TORCH_VERSIONS
|
|
do
|
|
for cv in $CUDA_VERSIONS
|
|
do
|
|
DOCKER_IMAGE+=("\"hpcaitech/pytorch-cuda:${tv}-${cv}\"")
|
|
done
|
|
done
|
|
|
|
container=$( IFS=',' ; echo "${DOCKER_IMAGE[*]}" )
|
|
container="[${container}]"
|
|
echo "$container"
|
|
echo "::set-output name=matrix::{\"container\":$(echo "$container")}"
|
|
|
|
build:
|
|
name: Test for PyTorch Compatibility
|
|
needs: matrix_preparation
|
|
if: github.repository == 'hpcaitech/ColossalAI'
|
|
runs-on: [self-hosted, 8-gpu]
|
|
strategy:
|
|
fail-fast: false
|
|
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
|
|
container:
|
|
image: ${{ matrix.container }}
|
|
options: --gpus all --rm -v /dev/shm -v /data/scratch/:/data/scratch/
|
|
timeout-minutes: 200
|
|
steps:
|
|
- name: Install dependencies
|
|
run: |
|
|
apt update && apt install -y cmake
|
|
pip install -U pip setuptools==68.2.2 wheel --user
|
|
|
|
- uses: actions/checkout@v2
|
|
with:
|
|
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
|
|
|
|
- name: Install Colossal-AI
|
|
run: |
|
|
BUILD_EXT=1 pip install -v -e .
|
|
pip install --no-cache-dir -r requirements/requirements-test.txt
|
|
|
|
- name: Install tensornvme
|
|
run: |
|
|
DISABLE_URING=1 pip install -v git+https://github.com/hpcaitech/TensorNVMe.git
|
|
|
|
- name: Unit Testing
|
|
run: |
|
|
PYTHONPATH=$PWD pytest --durations=0 tests
|
|
env:
|
|
DATA: /data/scratch/cifar-10
|
|
LD_LIBRARY_PATH: /github/home/.tensornvme/lib
|
|
LLAMA_PATH: /data/scratch/llama-tiny
|
|
MOE_TENSOR_PATH: /data/scratch/moe_tensors
|
|
HF_ENDPOINT: https://hf-mirror.com
|