diff --git a/colossalai/inference/README.md b/colossalai/inference/README.md index 4aca7aeb0..cf5dbf245 100644 --- a/colossalai/inference/README.md +++ b/colossalai/inference/README.md @@ -59,16 +59,14 @@ dependencies pytorch= 1.13.1 (gpu) cuda>= 11.6 transformers= 4.30.2 -triton==2.0.0.dev20221202 -# for install vllm, please use this branch to install https://github.com/tiandiao123/vllm/tree/setup_branch -vllm -# for install flash-attention, please use commit hash: 67ae6fd74b4bc99c36b2ce524cf139c35663793c +triton +# for install flash-attention flash-attention # install lightllm since we depend on lightllm triton kernels git clone https://github.com/ModelTC/lightllm -git checkout 28c1267cfca536b7b4f28e921e03de735b003039 cd lightllm +git checkout 28c1267cfca536b7b4f28e921e03de735b003039 pip3 install -e . # also, install xformers from source: @@ -93,8 +91,8 @@ pip install -e . # install lightllm git clone https://github.com/ModelTC/lightllm -git checkout 28c1267cfca536b7b4f28e921e03de735b003039 cd lightllm +git checkout 28c1267cfca536b7b4f28e921e03de735b003039 pip3 install -e . # install xformers from source