From f1bc2418c44c9ddb2b7b0551bd12fd2b83e4531b Mon Sep 17 00:00:00 2001 From: Frank Lee Date: Thu, 5 Jan 2023 15:13:11 +0800 Subject: [PATCH] [setup] make cuda extension build optional (#2336) * [setup] make cuda extension build optional * polish code * polish code * polish code --- .github/workflows/build.yml | 35 +++++++++++++++++++------------ .github/workflows/build_gpu_8.yml | 3 +-- setup.py | 13 +++++++++--- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f9d43430c..6b3f9f9d7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,13 +5,31 @@ on: types: [synchronize, labeled] jobs: - build: - name: Build and Test Colossal-AI + detect: + name: Detect kernel-related file change if: | github.event.pull_request.draft == false && github.base_ref == 'main' && github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && contains( github.event.pull_request.labels.*.name, 'Run Build and Test') + outputs: + changedFiles: ${{ steps.find-changed-files.outputs.changedFiles }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Find the changed files + id: find-changed-files + uses: tj-actions/changed-files@v34 + with: + since_last_remote_commit: true + files: | + op_builder/** + colossalai/kernel/** + setup.py + + build: + name: Build and Test Colossal-AI + needs: detect runs-on: [self-hosted, gpu] container: image: hpcaitech/pytorch-cuda:1.11.0-11.3.0 @@ -34,24 +52,15 @@ jobs: - uses: actions/checkout@v2 with: ssh-key: ${{ secrets.SSH_KEY_FOR_CI }} - - - name: Find the changed files - id: find-changed-files - uses: tj-actions/changed-files@v34 - with: - files: | - op_builder/** - colossalai/kernel/** - setup.py - name: Restore cache - if: steps.find-changed-files.outputs.any_changed != 'true' + if: needs.detect.outputs.anyChanged == 'true' run: | [ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/ - name: Install Colossal-AI run: | - pip install -v -e . + CUDA_EXT=1 pip install -v -e . pip install -r requirements/requirements-test.txt - name: Unit Testing diff --git a/.github/workflows/build_gpu_8.yml b/.github/workflows/build_gpu_8.yml index 2a405d86f..be8337dd0 100644 --- a/.github/workflows/build_gpu_8.yml +++ b/.github/workflows/build_gpu_8.yml @@ -33,8 +33,7 @@ jobs: - name: Install Colossal-AI run: | [ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/ - pip install -r requirements/requirements.txt - pip install -v -e . + CUDA_EXT=1 pip install -v -e . cp -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/ pip install -r requirements/requirements-test.txt - name: Unit Testing diff --git a/setup.py b/setup.py index dda1061d7..62cea133f 100644 --- a/setup.py +++ b/setup.py @@ -18,15 +18,22 @@ try: TORCH_AVAILABLE = True except ImportError: TORCH_AVAILABLE = False + CUDA_HOME = None # ninja build does not work unless include_dirs are abs path this_dir = os.path.dirname(os.path.abspath(__file__)) -build_cuda_ext = True +build_cuda_ext = False ext_modules = [] -if int(os.environ.get('NO_CUDA_EXT', '0')) == 1 or not TORCH_AVAILABLE: - build_cuda_ext = False +if int(os.environ.get('CUDA_EXT', '0')) == 1: + if not TORCH_AVAILABLE: + raise ModuleNotFoundError("PyTorch is not found while CUDA_EXT=1. You need to install PyTorch first in order to build CUDA extensions") + + if not CUDA_HOME: + raise RuntimeError("CUDA_HOME is not found while CUDA_EXT=1. You need to export CUDA_HOME environment vairable or install CUDA Toolkit first in order to build CUDA extensions") + + build_cuda_ext = True def check_cuda_torch_binary_vs_bare_metal(cuda_dir):