mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-01 09:07:51 +00:00
[nfc] fix typo colossalai/cli fx kernel (#3847)
* fix typo colossalai/autochunk auto_parallel amp * fix typo colossalai/auto_parallel nn utils etc. * fix typo colossalai/auto_parallel autochunk fx/passes etc. * fix typo docs/ * change placememt_policy to placement_policy in docs/ and examples/ * fix typo colossalai/ applications/ * fix typo colossalai/cli fx kernel
This commit is contained in:
@@ -138,7 +138,7 @@ if HAS_MEM_EFF_ATTN:
|
||||
elif attn_mask_type == AttnMaskType.causal: # gpt style
|
||||
attn_bias = LowerTriangularMask()
|
||||
|
||||
if bias is not None: # alibi / relative position emebedding
|
||||
if bias is not None: # alibi / relative position embedding
|
||||
assert allow_alibi, "flash attention with bias is not supported in this system."
|
||||
assert attn_mask_type == AttnMaskType.causal, \
|
||||
"attention with bias is only supported for causal attention so far."
|
||||
|
@@ -43,7 +43,7 @@ class Config:
|
||||
attn_prob_dropout_ratio: float # attention score dropout ratio
|
||||
hidden_dropout_ratio: float # dropout ration before residual
|
||||
norm_first: bool # norm_first
|
||||
fp16: bool # fp16 presion
|
||||
fp16: bool # fp16 precision
|
||||
|
||||
|
||||
class MultiHeadAttention1DFunc(Function):
|
||||
|
@@ -43,7 +43,7 @@ def warmup_jit_fusion(batch_size: int,
|
||||
seq_length: int = 512,
|
||||
vocab_size: int = 32768,
|
||||
dtype: torch.dtype = torch.float32):
|
||||
""" Compilie JIT functions before the main training steps """
|
||||
""" Compile JIT functions before the main training steps """
|
||||
|
||||
embed = Embedding(vocab_size, hidden_size).to(get_current_device())
|
||||
linear_1 = Linear(hidden_size, hidden_size * 4, skip_bias_add=True).to(get_current_device())
|
||||
|
Reference in New Issue
Block a user