diff --git a/colossalai/inference/README.md b/colossalai/inference/README.md
index 4aca7aeb0..cf5dbf245 100644
--- a/colossalai/inference/README.md
+++ b/colossalai/inference/README.md
@@ -59,16 +59,14 @@ dependencies
 pytorch= 1.13.1 (gpu)
 cuda>= 11.6
 transformers= 4.30.2
-triton==2.0.0.dev20221202
-# for install vllm, please use this branch to install https://github.com/tiandiao123/vllm/tree/setup_branch
-vllm
-# for install flash-attention, please use commit hash: 67ae6fd74b4bc99c36b2ce524cf139c35663793c
+triton
+# for install flash-attention
 flash-attention
 
 # install lightllm since we depend on lightllm triton kernels
 git clone https://github.com/ModelTC/lightllm 
-git checkout 28c1267cfca536b7b4f28e921e03de735b003039
 cd lightllm
+git checkout 28c1267cfca536b7b4f28e921e03de735b003039
 pip3 install -e .
 
 # also, install xformers from source: 
@@ -93,8 +91,8 @@ pip install -e .
 
 # install lightllm
 git clone https://github.com/ModelTC/lightllm 
-git checkout 28c1267cfca536b7b4f28e921e03de735b003039
 cd lightllm
+git checkout 28c1267cfca536b7b4f28e921e03de735b003039
 pip3 install -e .
 
 # install xformers from source