name: Compatibility Test on Dispatch on: workflow_dispatch: inputs: torch_version: type: string description: torch version, separated by comma required: true cuda_version: type: string description: cuda version, separated by comma required: true jobs: matrix_preparation: name: Prepare Container List runs-on: ubuntu-latest outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: - id: set-matrix env: TORCH_VERSIONS: ${{ inputs.torch_version }} CUDA_VERSIONS: ${{ inputs.cuda_version }} run: | IFS=',' DOCKER_IMAGE=() for tv in $TORCH_VERSIONS do for cv in $CUDA_VERSIONS do DOCKER_IMAGE+=("\"hpcaitech/pytorch-cuda:${tv}-${cv}\"") done done container=$( IFS=',' ; echo "${DOCKER_IMAGE[*]}" ) container="[${container}]" echo "$container" echo "::set-output name=matrix::{\"container\":$(echo "$container")}" build: name: Test for PyTorch Compatibility needs: matrix_preparation if: github.repository == 'hpcaitech/ColossalAI' runs-on: [self-hosted, 8-gpu] strategy: fail-fast: false matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}} container: image: ${{ matrix.container }} options: --gpus all --rm -v /dev/shm -v /data/scratch/:/data/scratch/ timeout-minutes: 200 steps: - name: Install dependencies run: | apt update && apt install -y cmake pip install -U pip setuptools==68.2.2 wheel --user - uses: actions/checkout@v2 with: ssh-key: ${{ secrets.SSH_KEY_FOR_CI }} - name: Install Colossal-AI run: | BUILD_EXT=1 pip install -v -e . pip install --no-cache-dir -r requirements/requirements-test.txt - name: Install tensornvme run: | DISABLE_URING=1 pip install -v git+https://github.com/hpcaitech/TensorNVMe.git - name: Unit Testing run: | PYTHONPATH=$PWD pytest --durations=0 tests env: DATA: /data/scratch/cifar-10 LD_LIBRARY_PATH: /github/home/.tensornvme/lib LLAMA_PATH: /data/scratch/llama-tiny MOE_TENSOR_PATH: /data/scratch/moe_tensors HF_ENDPOINT: https://hf-mirror.com