From 4eafe0c8141c120229be3ddce9c5591c1535348a Mon Sep 17 00:00:00 2001 From: Runyu Lu Date: Thu, 21 Mar 2024 11:28:42 +0800 Subject: [PATCH] [fix] unused option --- colossalai/inference/modeling/models/nopadding_llama.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/colossalai/inference/modeling/models/nopadding_llama.py b/colossalai/inference/modeling/models/nopadding_llama.py index b8e8c61dd..ccb2e837d 100644 --- a/colossalai/inference/modeling/models/nopadding_llama.py +++ b/colossalai/inference/modeling/models/nopadding_llama.py @@ -199,8 +199,7 @@ def llama_rmsnorm_forward( residual: torch.Tensor = None, use_cuda_kernel: bool = True, ): - # if use_cuda_kernel: - if False: + if use_cuda_kernel: if residual is not None: inference_ops.fused_add_rms_layernorm(hidden_states, residual, self.weight.data, self.variance_epsilon) return hidden_states, residual @@ -340,8 +339,7 @@ class NopadLlamaAttention(LlamaAttention): sm_scale=sm_scale, ) else: - # if use_cuda_kernel: - if False: + if use_cuda_kernel: inference_ops.rotary_embedding_and_cache_copy( query_states, key_states,