[Fix/Inference] Fix format of input prompts and input model in inference engine (#5395)

* Fix bugs in inference_engine

* fix bugs in engine.py

* rm  CUDA_VISIBLE_DEVICES

* add request_ids in generate

* fix bug in engine.py

* add logger.debug for BatchBucket
This commit is contained in:
yuehuayingxueluo
2024-02-23 10:51:35 +08:00
committed by GitHub
parent 2a718c8be8
commit bc1da87366
5 changed files with 27 additions and 8 deletions

View File

@@ -27,7 +27,7 @@ CUDA_VISIBLE_DEVICES_set_n_least_memory_usage 1
for input_len in 128 512 1024; do
for output_len in 128 256; do
for bsz in 16 32 64; do
python3 ${PY_SCRIPT} -m llama2-7b --tp_size 1 --pp_size 1 -b ${bsz} -s ${input_len} --output_len ${output_len} --mode ${mode} --model_path "/home/caidi/llama_model/" | tee logs/${input_len}_${output_len}_${mode}_${GPU}_${bsz}.txt
python3 ${PY_SCRIPT} -m llama2-7b --tp_size 1 --pp_size 1 -b ${bsz} -s ${input_len} --output_len ${output_len} --mode ${mode} --test_random_weight | tee logs/${input_len}_${output_len}_${mode}_${GPU}_${bsz}.txt
done
done
done