[Inference] Finish Online Serving Test, add streaming output api, continuous batching test and example (#5432)

* finish online test and add examples * fix test_contionus_batching * fix some bugs * fix bash * fix * fix inference * finish revision * fix typos * revision
2025-09-23 18:39:56 +00:00 · 2024-03-18 17:06:05 +08:00
parent 69cd7e069d
commit de378cd2ab
10 changed files with 214 additions and 94 deletions
--- a/examples/inference/client/run_locust.sh
+++ b/examples/inference/client/run_locust.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+#argument1: model_path
+
+# launch server
+model_path=${1:-"lmsys/vicuna-7b-v1.3"}
+echo "Model Path: $model_path"
+echo "Starting server..."
+python -m colossalai.inference.server.api_server --model $model_path &
+SERVER_PID=$!
+
+# waiting time
+sleep 60
+
+# Run Locust
+echo "Starting Locust..."
+echo "The test will automatically begin, you can turn to http://0.0.0.0:8089 for more information."
+locust -f locustfile.py -t 300 --tags online-generation --host http://127.0.0.1:8000 --autostart --users 100 --stop-timeout 10
+
+# kill Server
+echo "Stopping server..."
+kill $SERVER_PID
+
+echo "Test and server shutdown completely"