[Inference] Fix flash-attn import and add model test (#5794)

* Fix torch int32 dtype Signed-off-by: char-1ee <xingjianli59@gmail.com> * Fix flash-attn import Signed-off-by: char-1ee <xingjianli59@gmail.com> * Add generalized model test Signed-off-by: char-1ee <xingjianli59@gmail.com> * Remove exposed path to model Signed-off-by: char-1ee <xingjianli59@gmail.com> * Add default value for use_flash_attn Signed-off-by: char-1ee <xingjianli59@gmail.com> * Rename model test Signed-off-by: char-1ee <xingjianli59@gmail.com> --------- Signed-off-by: char-1ee <xingjianli59@gmail.com>
2025-09-06 11:32:10 +00:00 · 2024-06-12 14:13:50 +08:00
parent aac941ef78
commit 8554585a5f
7 changed files with 171 additions and 8 deletions
--- a/colossalai/inference/utils.py
+++ b/colossalai/inference/utils.py
@@ -152,6 +152,8 @@ def can_use_flash_attn2(dtype: torch.dtype) -> bool:
        return False

    try:
+        from flash_attn import flash_attn_varlen_func  # noqa
+
        return True
    except ImportError:
        logger.warning(f"flash_attn2 has not been installed yet, we will use triton flash attn instead.")