[npu] use extension for op builder (#5172)

* update extension

* update cpu adam

* update is

* add doc for cpu adam

* update kernel

* update commit

* update flash

* update memory efficient

* update flash attn

* update flash attention loader

* update api

* fix

* update doc

* update example time limit

* reverse change

* fix doc

* remove useless kernel

* fix

* not use warning

* update

* update
This commit is contained in:
Xuanlei Zhao
2024-01-08 11:39:16 +08:00
committed by GitHub
parent d6df19bae7
commit dd2c28a323
35 changed files with 1067 additions and 274 deletions

View File

@@ -90,9 +90,9 @@ class FusedAdamKernel(AdamKernel):
class CPUAdamKernel(AdamKernel):
def __init__(self, lr: float, beta1: float, beta2: float, eps: float, weight_decay: float, use_adamw: bool) -> None:
super().__init__(lr, beta1, beta2, eps, weight_decay, use_adamw)
from colossalai.kernel.op_builder import CPUAdamBuilder
from colossalai.kernel import CPUAdamLoader
cpu_optim = CPUAdamBuilder().load()
cpu_optim = CPUAdamLoader().load()
self.cpu_adam_op = cpu_optim.CPUAdamOptimizer(lr, beta1, beta2, eps, weight_decay, use_adamw)

View File

@@ -4,13 +4,11 @@ import pytest
import torch
from einops import rearrange
from colossalai.kernel.cuda_native.mha.flash_attn_2 import HAS_FLASH_ATTN
from colossalai.kernel.cuda_native.mha.mem_eff_attn import HAS_MEM_EFF_ATTN
from colossalai.kernel.extensions.flash_attention import HAS_FLASH_ATTN, HAS_MEM_EFF_ATTN
from colossalai.testing import clear_cache_before_run, parameterize
if HAS_MEM_EFF_ATTN or HAS_FLASH_ATTN:
from colossalai.kernel.cuda_native import ColoAttention
from colossalai.kernel.cuda_native.scaled_softmax import AttnMaskType
from colossalai.kernel import AttnMaskType, ColoAttention
DTYPE = [torch.float16, torch.bfloat16, torch.float32]