mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-10 21:40:02 +00:00
[Inference] Finish Online Serving Test, add streaming output api, continuous batching test and example (#5432)
* finish online test and add examples * fix test_contionus_batching * fix some bugs * fix bash * fix * fix inference * finish revision * fix typos * revision
This commit is contained in:
@@ -18,18 +18,17 @@ class CompletionServing:
|
||||
async def create_completion(self, request, generation_config):
|
||||
request_dict = await request.json()
|
||||
request_id = id_generator()
|
||||
|
||||
prompt = request_dict.pop("prompt")
|
||||
|
||||
# it is not a intuitive way
|
||||
self.engine.engine.generation_config = generation_config
|
||||
result_generator = self.engine.generate(request_id, prompt=prompt)
|
||||
|
||||
final_res = None
|
||||
async for res in result_generator:
|
||||
if await request.is_disconnected():
|
||||
# Abort the request if the client disconnects.
|
||||
await self.engine.abort(request_id)
|
||||
return {"error_msg": "Client disconnected"}
|
||||
final_res = res
|
||||
if await request.is_disconnected():
|
||||
# Abort the request if the client disconnects.
|
||||
await self.engine.abort(request_id)
|
||||
raise RuntimeError("Client disconnected")
|
||||
|
||||
final_res = await result_generator
|
||||
return final_res
|
||||
|
Reference in New Issue
Block a user