mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-01 17:17:05 +00:00
[Inference] Finish Online Serving Test, add streaming output api, continuous batching test and example (#5432)
* finish online test and add examples * fix test_contionus_batching * fix some bugs * fix bash * fix * fix inference * finish revision * fix typos * revision
This commit is contained in:
30
examples/inference/client/locustfile.py
Normal file
30
examples/inference/client/locustfile.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from locust import HttpUser, between, tag, task
|
||||
|
||||
|
||||
class QuickstartUser(HttpUser):
|
||||
wait_time = between(1, 5)
|
||||
|
||||
@tag("online-generation")
|
||||
@task(5)
|
||||
def completion(self):
|
||||
self.client.post("/v1/completion", json={"prompt": "hello, who are you? ", "stream": "False"})
|
||||
|
||||
@tag("online-generation")
|
||||
@task(5)
|
||||
def completion_streaming(self):
|
||||
self.client.post("/v1/completion", json={"prompt": "hello, who are you? ", "stream": "True"})
|
||||
|
||||
@tag("offline-generation")
|
||||
@task(5)
|
||||
def generate_stream(self):
|
||||
self.client.post("/generate", json={"prompt": "Can you help me? ", "stream": "True"})
|
||||
|
||||
@tag("offline-generation")
|
||||
@task(5)
|
||||
def generate(self):
|
||||
self.client.post("/generate", json={"prompt": "Can you help me? ", "stream": "False"})
|
||||
|
||||
@tag("online-generation", "offline-generation")
|
||||
@task
|
||||
def get_models(self):
|
||||
self.client.get("/v0/models")
|
24
examples/inference/client/run_locust.sh
Normal file
24
examples/inference/client/run_locust.sh
Normal file
@@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
|
||||
#argument1: model_path
|
||||
|
||||
# launch server
|
||||
model_path=${1:-"lmsys/vicuna-7b-v1.3"}
|
||||
echo "Model Path: $model_path"
|
||||
echo "Starting server..."
|
||||
python -m colossalai.inference.server.api_server --model $model_path &
|
||||
SERVER_PID=$!
|
||||
|
||||
# waiting time
|
||||
sleep 60
|
||||
|
||||
# Run Locust
|
||||
echo "Starting Locust..."
|
||||
echo "The test will automatically begin, you can turn to http://0.0.0.0:8089 for more information."
|
||||
locust -f locustfile.py -t 300 --tags online-generation --host http://127.0.0.1:8000 --autostart --users 100 --stop-timeout 10
|
||||
|
||||
# kill Server
|
||||
echo "Stopping server..."
|
||||
kill $SERVER_PID
|
||||
|
||||
echo "Test and server shutdown completely"
|
Reference in New Issue
Block a user