[nfc] fix typo colossalai/cli fx kernel (#3847)

* fix typo colossalai/autochunk auto_parallel amp * fix typo colossalai/auto_parallel nn utils etc. * fix typo colossalai/auto_parallel autochunk fx/passes etc. * fix typo docs/ * change placememt_policy to placement_policy in docs/ and examples/ * fix typo colossalai/ applications/ * fix typo colossalai/cli fx kernel
2025-09-01 09:07:51 +00:00 · 2023-06-02 15:02:45 +08:00
parent 281b33f362
commit 70c8cdecf4
11 changed files with 13 additions and 13 deletions
--- a/colossalai/kernel/cuda_native/flash_attention.py
+++ b/colossalai/kernel/cuda_native/flash_attention.py
@@ -138,7 +138,7 @@ if HAS_MEM_EFF_ATTN:
            elif attn_mask_type == AttnMaskType.causal:    # gpt style
                attn_bias = LowerTriangularMask()

-            if bias is not None:    # alibi / relative position emebedding
+            if bias is not None:    # alibi / relative position embedding
                assert allow_alibi, "flash attention with bias is not supported in this system."
                assert attn_mask_type == AttnMaskType.causal, \
                    "attention with bias is only supported for causal attention so far."
--- a/colossalai/kernel/cuda_native/multihead_attention.py
+++ b/colossalai/kernel/cuda_native/multihead_attention.py
@@ -43,7 +43,7 @@ class Config:
    attn_prob_dropout_ratio: float    # attention score dropout ratio
    hidden_dropout_ratio: float    # dropout ration before residual
    norm_first: bool    # norm_first
-    fp16: bool    # fp16 presion
+    fp16: bool    # fp16 precision


 class MultiHeadAttention1DFunc(Function):
--- a/colossalai/kernel/jit/option.py
+++ b/colossalai/kernel/jit/option.py
@@ -43,7 +43,7 @@ def warmup_jit_fusion(batch_size: int,
                      seq_length: int = 512,
                      vocab_size: int = 32768,
                      dtype: torch.dtype = torch.float32):
-    """ Compilie JIT functions before the main training steps """
+    """ Compile JIT functions before the main training steps """

    embed = Embedding(vocab_size, hidden_size).to(get_current_device())
    linear_1 = Linear(hidden_size, hidden_size * 4, skip_bias_add=True).to(get_current_device())