mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-03 01:55:12 +00:00
[Inference]Optimize generation process of inference engine (#5356)
* opt inference engine * fix run_benchmark.sh * fix generate in engine.py * rollback tesh_inference_engine.py
This commit is contained in:
@@ -141,8 +141,7 @@ def benchmark_inference(args):
|
||||
with ctx:
|
||||
for _ in range(N_WARMUP_STEPS):
|
||||
if args.mode == "caiinference":
|
||||
engine.add_request(prompts_token_ids=data)
|
||||
engine.generate(generation_config)
|
||||
engine.generate(prompts_token_ids=data, generation_config=generation_config)
|
||||
else:
|
||||
engine.generate(data, generation_config=generation_config)
|
||||
if args.profile:
|
||||
@@ -156,8 +155,7 @@ def benchmark_inference(args):
|
||||
whole_end2end = time.perf_counter()
|
||||
if args.mode == "caiinference":
|
||||
for _ in range(args.batch_size // mbsz):
|
||||
engine.add_request(prompts_token_ids=data)
|
||||
engine.generate(generation_config)
|
||||
engine.generate(prompts_token_ids=data, generation_config=generation_config)
|
||||
else:
|
||||
for _ in range(args.batch_size // mbsz):
|
||||
engine.generate(data, generation_config=generation_config)
|
||||
|
Reference in New Issue
Block a user