[Inference] Finish Online Serving Test, add streaming output api, continuous batching test and example (#5432)

* finish online test and add examples

* fix test_contionus_batching

* fix some bugs

* fix bash

* fix

* fix inference

* finish revision

* fix typos

* revision
This commit is contained in:
Jianghai
2024-03-18 17:06:05 +08:00
committed by CjhHa1
parent 69cd7e069d
commit de378cd2ab
10 changed files with 214 additions and 94 deletions

View File

@@ -0,0 +1,30 @@
from locust import HttpUser, between, tag, task
class QuickstartUser(HttpUser):
wait_time = between(1, 5)
@tag("online-generation")
@task(5)
def completion(self):
self.client.post("/v1/completion", json={"prompt": "hello, who are you? ", "stream": "False"})
@tag("online-generation")
@task(5)
def completion_streaming(self):
self.client.post("/v1/completion", json={"prompt": "hello, who are you? ", "stream": "True"})
@tag("offline-generation")
@task(5)
def generate_stream(self):
self.client.post("/generate", json={"prompt": "Can you help me? ", "stream": "True"})
@tag("offline-generation")
@task(5)
def generate(self):
self.client.post("/generate", json={"prompt": "Can you help me? ", "stream": "False"})
@tag("online-generation", "offline-generation")
@task
def get_models(self):
self.client.get("/v0/models")

View File

@@ -0,0 +1,24 @@
#!/bin/bash
#argument1: model_path
# launch server
model_path=${1:-"lmsys/vicuna-7b-v1.3"}
echo "Model Path: $model_path"
echo "Starting server..."
python -m colossalai.inference.server.api_server --model $model_path &
SERVER_PID=$!
# waiting time
sleep 60
# Run Locust
echo "Starting Locust..."
echo "The test will automatically begin, you can turn to http://0.0.0.0:8089 for more information."
locust -f locustfile.py -t 300 --tags online-generation --host http://127.0.0.1:8000 --autostart --users 100 --stop-timeout 10
# kill Server
echo "Stopping server..."
kill $SERVER_PID
echo "Test and server shutdown completely"