[Inference] Fix API server, test and example (#5712)

* fix api server * fix generation config * fix api server * fix comments * fix infer hanging bug * resolve comments, change backend to free port
2026-01-05 23:54:53 +00:00 · 2024-05-15 15:47:31 +08:00
parent 74c47921fa
commit f47f2fbb24
5 changed files with 73 additions and 32 deletions
--- a/examples/inference/client/run_locust.sh
+++ b/examples/inference/client/run_locust.sh
@@ -6,8 +6,9 @@
 model_path=${1:-"lmsys/vicuna-7b-v1.3"}
 chat_template="{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}"
 echo "Model Path: $model_path"
+echo "Chat Tempelate" "${chat_template}"
 echo "Starting server..."
-python -m colossalai.inference.server.api_server --model $model_path --chat-template $chat_template &
+python -m colossalai.inference.server.api_server --model $model_path --chat-template "${chat_template}" &
 SERVER_PID=$!

 # waiting time
@@ -17,9 +18,9 @@ sleep 60
 echo "Starting Locust..."
 echo "The test will automatically begin, you can turn to http://0.0.0.0:8089 for more information."
 echo "Test completion api first"
-locust -f locustfile.py -t 300 --tags online-generation --host http://127.0.0.1:8000 --autostart --users 100 --stop-timeout 10
+locust -f locustfile.py -t 300 --tags online-generation --host http://127.0.0.1:8000 --autostart --users 300 --stop-timeout 10
 echo "Test chat api"
-locust -f locustfile.py -t 300 --tags online-chat --host http://127.0.0.1:8000 --autostart --users 100 --stop-timeout 10
+locust -f locustfile.py -t 300 --tags online-chat --host http://127.0.0.1:8000 --autostart --users 300 --stop-timeout 10
 # kill Server
 echo "Stopping server..."
 kill $SERVER_PID