[Fix/Inference] Fix format of input prompts and input model in inference engine (#5395)

* Fix bugs in inference_engine * fix bugs in engine.py * rm CUDA_VISIBLE_DEVICES * add request_ids in generate * fix bug in engine.py * add logger.debug for BatchBucket
2025-09-02 01:28:31 +00:00 · 2024-02-23 10:51:35 +08:00
parent 2a718c8be8
commit bc1da87366
5 changed files with 27 additions and 8 deletions
--- a/examples/inference/run_benchmark.sh
+++ b/examples/inference/run_benchmark.sh
@@ -27,7 +27,7 @@ CUDA_VISIBLE_DEVICES_set_n_least_memory_usage 1
 for input_len in  128 512 1024; do
    for output_len in 128 256; do
        for bsz in 16 32 64; do
-            python3 ${PY_SCRIPT} -m llama2-7b --tp_size 1 --pp_size 1 -b ${bsz} -s ${input_len} --output_len ${output_len} --mode ${mode} --model_path "/home/caidi/llama_model/" | tee logs/${input_len}_${output_len}_${mode}_${GPU}_${bsz}.txt
+            python3 ${PY_SCRIPT} -m llama2-7b --tp_size 1 --pp_size 1 -b ${bsz} -s ${input_len} --output_len ${output_len} --mode ${mode} --test_random_weight | tee logs/${input_len}_${output_len}_${mode}_${GPU}_${bsz}.txt
        done
    done
 done