diff --git a/colossalai/inference/modeling/models/llama.py b/colossalai/inference/modeling/models/llama.py index 10b2134a3..1331cc021 100644 --- a/colossalai/inference/modeling/models/llama.py +++ b/colossalai/inference/modeling/models/llama.py @@ -159,7 +159,7 @@ def llama_attn_forward( _, _, _, block_size = k_cache.shape - # NOTE: context_attention_unpadded is unsed for testing accuracy and we can only use aligned inputs. + # NOTE: context_attention_unpadded is used for testing accuracy and we can only use aligned inputs. # The code below will be uncommented after the development of attention-related kernel is completed. if is_prompts: attn_output = context_attention_unpadded(