Revert "[Inference] Adapt to Fused rotary (#5348)" (#5373)

This reverts commit 9f4ab2eb92.
This commit is contained in:
Frank Lee
2024-02-07 14:27:04 +08:00
committed by GitHub
parent 9f4ab2eb92
commit 8106ede07f
5 changed files with 22 additions and 161 deletions

View File

@@ -1,5 +1,4 @@
ROOT=$(realpath $(dirname $0))
echo $ROOT
PY_SCRIPT=${ROOT}/benchmark_llama.py
GPU=$(nvidia-smi -L | head -1 | cut -d' ' -f4 | cut -d'-' -f1)
mode=$1