mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-07 20:10:17 +00:00
[Inference] Finish Online Serving Test, add streaming output api, continuous batching test and example (#5432)
* finish online test and add examples * fix test_contionus_batching * fix some bugs * fix bash * fix * fix inference * finish revision * fix typos * revision
This commit is contained in:
@@ -58,7 +58,7 @@ async def generate(request: Request) -> Response:
|
||||
# Streaming case
|
||||
def stream_results():
|
||||
for request_output in results:
|
||||
ret = {"text": request_output}
|
||||
ret = {"text": request_output[len(prompt) :]}
|
||||
yield (json.dumps(ret) + "\0").encode("utf-8")
|
||||
|
||||
if stream:
|
||||
@@ -71,7 +71,7 @@ async def generate(request: Request) -> Response:
|
||||
# Abort the request if the client disconnects.
|
||||
engine.abort(request_id)
|
||||
return Response(status_code=499)
|
||||
final_output = request_output
|
||||
final_output = request_output[len(prompt) :]
|
||||
|
||||
assert final_output is not None
|
||||
ret = {"text": final_output}
|
||||
@@ -81,11 +81,15 @@ async def generate(request: Request) -> Response:
|
||||
@app.post("/v1/completion")
|
||||
async def create_completion(request: Request):
|
||||
request_dict = await request.json()
|
||||
stream = request_dict.pop("stream", False)
|
||||
generation_config = get_generation_config(request_dict)
|
||||
generator = await completion_serving.create_completion(request, generation_config)
|
||||
output = tokenizer.decode(generator.output_token_id)
|
||||
ret = {"request_id": generator.request_id, "text": output}
|
||||
return ret
|
||||
result = await completion_serving.create_completion(request, generation_config)
|
||||
|
||||
ret = {"request_id": result.request_id, "text": result.output}
|
||||
if stream:
|
||||
return StreamingResponse(content=json.dumps(ret) + "\0", media_type="text/event-stream")
|
||||
else:
|
||||
return JSONResponse(content=ret)
|
||||
|
||||
|
||||
def get_generation_config(request):
|
||||
|
Reference in New Issue
Block a user