Merge branch 'upgrade-transformers' of github.com:flybird11111/ColossalAI into upgrade-transformers

2025-09-29 05:26:21 +00:00 · 2025-04-10 12:57:46 +08:00
parent e8a3d52381 964f9a7974
commit 5c56a7fd7b
1 changed files with 3 additions and 1 deletions
--- a/tests/test_infer/test_kernels/cuda/test_flash_decoding_attention.py
+++ b/tests/test_infer/test_kernels/cuda/test_flash_decoding_attention.py
@@ -11,6 +11,7 @@ from tests.test_infer.test_kernels.triton.test_context_attn_unpad import generat
 inference_ops = InferenceOpsLoader().load()
 from colossalai.testing import clear_cache_before_run
 from tests.test_infer.test_kernels.triton.kernel_utils import (
    convert_kv_unpad_to_padded,
    create_attention_mask,
@@ -18,7 +19,6 @@ from tests.test_infer.test_kernels.triton.kernel_utils import (
    generate_caches_and_block_tables_vllm,
    torch_attn_ref,
 )
 from colossalai.testing import clear_cache_before_run
 q_len = 1
 PARTITION_SIZE = 512
@@ -56,6 +56,7 @@ def numpy_allclose(x, y, rtol, atol):
    np.testing.assert_allclose(x_numpy, y_numpy, rtol=rtol, atol=atol)
@clear_cache_before_run()
@pytest.mark.parametrize("BATCH_SIZE", [1, 4, 7, 32])
@pytest.mark.parametrize("BLOCK_SIZE", [8, 16, 32])
@@ -197,6 +198,7 @@ except ImportError:
    HAS_VLLM = False
    print("The subsequent test requires vllm. Please refer to https://github.com/vllm-project/vllm")
@clear_cache_before_run()
@pytest.mark.skipif(not HAS_VLLM, reason="requires vllm")
@pytest.mark.parametrize("BATCH_SIZE", [1, 7, 32])