From 32c46e146e96d9f6ee949b9f64b84a789e5479ea Mon Sep 17 00:00:00 2001
From: Frank Lee <somerlee.9@gmail.com>
Date: Thu, 12 Jan 2023 10:57:02 +0800
Subject: [PATCH] [workflow] automated bdist wheel build (#2459)

* [workflow] automated bdist wheel build

* polish workflow

* polish readme

* polish readme
---
 .bdist.json                              | 24 ++++++++
 .github/workflows/README.md              | 32 ++++++++---
 .github/workflows/auto_release_bdist.yml | 70 ++++++++++++++++++++++++
 3 files changed, 118 insertions(+), 8 deletions(-)
 create mode 100644 .bdist.json
 create mode 100644 .github/workflows/auto_release_bdist.yml

diff --git a/.bdist.json b/.bdist.json
new file mode 100644
index 000000000..8693bca48
--- /dev/null
+++ b/.bdist.json
@@ -0,0 +1,24 @@
+{
+  "build": [
+    {
+      "torch_version": "1.11.0",
+      "cuda_image": "hpcaitech/cuda-conda:10.2"
+    },
+    {
+      "torch_version": "1.11.0",
+      "cuda_image": "hpcaitech/cuda-conda:11.3"
+    },
+    {
+      "torch_version": "1.12.1",
+      "cuda_image": "hpcaitech/cuda-conda:10.2"
+    },
+    {
+      "torch_version": "1.12.1",
+      "cuda_image": "hpcaitech/cuda-conda:11.3"
+    },
+    {
+      "torch_version": "1.12.1",
+      "cuda_image": "hpcaitech/cuda-conda:11.6"
+    }
+  ]
+}
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
index bc1f8504d..cda6a3139 100644
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@@ -48,14 +48,15 @@ In the section below, we will dive into the details of different workflows avail
 
 ### Release
 
-| Workflow Name               | File name                       | Description                                                                                                                                           |
-| --------------------------- | ------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `Draft GitHub Release Post` | `draft_github_release_post.yml` | Compose a GitHub release post draft based on the commit history.  Triggered when the change of `version.txt` is merged.                               |
-| `Release to PyPI`           | `release_pypi.yml`              | Build and release the wheel to PyPI.  Triggered when the change of `version.txt` is merged.                                                           |
-| `Release Nightly to PyPI`   | `release_nightly.yml`           | Build and release the nightly wheel to PyPI as `colossalai-nightly`. Automatically executed every Sunday.                                             |
-| `Release Docker`            | `release_docker.yml`            | Build and release the Docker image to DockerHub. Triggered when the change of `version.txt` is merged.                                                |
-| `Release bdist wheel`       | `release_bdist.yml`             | Build binary wheels with pre-built PyTorch extensions. Manually dispatched. See more details in the next section.                                     |
-| `Auto Compatibility Test`   | `auto_compatibility_test.yml`   | Check Colossal-AI's compatiblity against the PyTorch and CUDA version specified in `.compatibility`. Triggered when `version.txt` is changed in a PR. |
+| Workflow Name               | File name                       | Description                                                                                                                                                 |
+| --------------------------- | ------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `Draft GitHub Release Post` | `draft_github_release_post.yml` | Compose a GitHub release post draft based on the commit history.  Triggered when the change of `version.txt` is merged.                                     |
+| `Release to PyPI`           | `release_pypi.yml`              | Build and release the wheel to PyPI.  Triggered when the change of `version.txt` is merged.                                                                 |
+| `Release Nightly to PyPI`   | `release_nightly.yml`           | Build and release the nightly wheel to PyPI as `colossalai-nightly`. Automatically executed every Sunday.                                                   |
+| `Release Docker`            | `release_docker.yml`            | Build and release the Docker image to DockerHub. Triggered when the change of `version.txt` is merged.                                                      |
+| `Release bdist wheel`       | `release_bdist.yml`             | Build binary wheels with pre-built PyTorch extensions. Manually dispatched. See more details in the next section.                                           |
+| `Auto Release bdist wheel`  | `auto_release_bdist.yml`        | Build binary wheels with pre-built PyTorch extensions.Triggered when the change of `version.txt` is merged. Build specificatons are stored in `.bdist.json` |
+| `Auto Compatibility Test`   | `auto_compatibility_test.yml`   | Check Colossal-AI's compatiblity against the PyTorch and CUDA version specified in `.compatibility`. Triggered when `version.txt` is changed in a PR.       |
 
 ### Manual Dispatch
 
@@ -105,6 +106,21 @@ This section lists the files used to configure the workflow.
 
 This `.compatibility` file is to tell GitHub Actions which PyTorch and CUDA versions to test against. Each line in the file is in the format `${torch-version}-${cuda-version}`, which is a tag for Docker image. Thus, this tag must be present in the [docker registry](https://hub.docker.com/r/pytorch/conda-cuda) so as to perform the test.
 
+2. `.bdist.json`
+
+This file controls what pytorch/cuda compatible pre-built releases will be built and published. You can add a new entry according to the json schema below if there is a new wheel that needs to be built with AOT compilation of PyTorch extensions.
+
+```json
+{
+  "build": [
+    {
+      "torch_version": "",
+      "cuda_image": ""
+    },
+  ]
+}
+```
+
 ## Progress Log
 
 - [x] unit testing
diff --git a/.github/workflows/auto_release_bdist.yml b/.github/workflows/auto_release_bdist.yml
new file mode 100644
index 000000000..56a3036f8
--- /dev/null
+++ b/.github/workflows/auto_release_bdist.yml
@@ -0,0 +1,70 @@
+name: Auto Release bdist wheel
+
+on:
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - 'version.txt'
+    types:
+      - closed
+
+jobs:
+  matrix_preparation:
+    name: Prepare Container List
+    if: ( github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true ) && github.repository == 'hpcaitech/ColossalAI'
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v3
+      - id: set-matrix
+        run: |
+          bdist=$(cat .bdist.json | tr '\n' ' ')
+          echo "matrix=${bdist}" >> $GITHUB_OUTPUT
+
+  build:
+    name: Release bdist wheels
+    needs: matrix_preparation
+    runs-on: [self-hosted, gpu]
+    strategy:
+      fail-fast: false
+      matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
+    container:
+      image: ${{ matrix.build.cuda_image }}
+      options: --gpus all --rm
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      # cub is for cuda 10.2
+      - name: Copy scripts
+        run: |
+          cp -r ./.github/workflows/scripts/* ./
+
+          # link the cache diretories to current path
+          ln -s /github/home/conda_pkgs ./conda_pkgs
+          ln -s /github/home/pip_wheels ./pip_wheels
+
+          # set the conda package path
+          echo "pkgs_dirs:\n  - $PWD/conda_pkgs" > ~/.condarc
+
+          # set safe directory
+          git config --global --add safe.directory /__w/ColossalAI/ColossalAI
+
+          # get cub package for cuda 10.2
+          wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
+          unzip 1.8.0.zip
+      - name: Build bdist wheel
+        run: |
+          pip install beautifulsoup4 requests packaging
+          python ./build_colossalai_wheel.py --torch_version $TORCH_VERSIONS
+        env:
+          TORCH_VERSIONS: ${{ matrix.build.torch_version }}
+      - name: 🚀 Deploy
+        uses: garygrossgarten/github-action-scp@release
+        with:
+          local: all_dist
+          remote: ${{ secrets.PRIVATE_PYPI_DIR }}
+          host: ${{ secrets.PRIVATE_PYPI_HOST }}
+          username: ${{ secrets.PRIVATE_PYPI_USER }}
+          password: ${{ secrets.PRIVATE_PYPI_PASSWD }}