[Inference] Optimize request handler of llama (#5512)

* optimize request_handler * fix ways of writing
2025-09-06 19:40:28 +00:00 · 2024-03-26 16:37:14 +08:00
parent 6251d68dc9
commit e6496dd371
2 changed files with 9 additions and 7 deletions
--- a/colossalai/inference/core/request_handler.py
+++ b/colossalai/inference/core/request_handler.py
@@ -298,8 +298,8 @@ class RequestHandler:
        """
        # do logit processor
        # NOTE: need to decide the granularity to process logits (sequence or batch)
+        config_dict = generation_config.to_dict()
        for type in ["top_k", "top_p", "min_p"]:
-            config_dict = generation_config.to_dict()
            if type in config_dict and config_dict[type] is not None:
                logits = logit_processor(type, logits, config_dict[type])