diff --git a/colossalai/inference/core/request_handler.py b/colossalai/inference/core/request_handler.py index a83e5041d..dd8591e7f 100644 --- a/colossalai/inference/core/request_handler.py +++ b/colossalai/inference/core/request_handler.py @@ -198,8 +198,6 @@ class RequestHandler: if type in config_dict and config_dict[type] is not None: logits = logit_processor(type, logits, config_dict[type]) - torch.cuda.synchronize() - # calculate probs probs = torch.softmax(logits, dim=-1, dtype=torch.float) logprobs = torch.log_softmax(logits, dim=-1, dtype=torch.float)