[Fix] Fix & Update Inference Tests (compatibility w/ main)

2025-09-01 17:17:05 +00:00 · 2024-05-05 16:28:56 +00:00
parent 56ed09aba5
commit 8754abae24
30 changed files with 32 additions and 30 deletions
--- a/examples/inference/benchmark_ops/benchmark_context_attn_unpad.py
+++ b/examples/inference/benchmark_ops/benchmark_context_attn_unpad.py
@@ -4,7 +4,7 @@ from transformers.modeling_attn_mask_utils import AttentionMaskConverter
 from colossalai.inference.modeling.layers.attention import PagedAttention
 from colossalai.kernel.triton import context_attention_unpadded
 from colossalai.utils import get_current_device
-from tests.test_infer.test_ops.triton.kernel_utils import generate_caches_and_block_tables_v2, torch_attn_ref
+from tests.test_infer.test_kernels.triton.kernel_utils import generate_caches_and_block_tables_v2, torch_attn_ref

 try:
    import triton  # noqa
--- a/examples/inference/benchmark_ops/benchmark_decoding_attn.py
+++ b/examples/inference/benchmark_ops/benchmark_decoding_attn.py
@@ -2,14 +2,14 @@ import torch

 from colossalai.kernel.triton import flash_decoding_attention
 from colossalai.utils import get_current_device
-from tests.test_infer.test_ops.triton.kernel_utils import (
+from tests.test_infer.test_kernels.triton.kernel_utils import (
    convert_kv_unpad_to_padded,
    create_attention_mask,
    generate_caches_and_block_tables_v2,
    generate_caches_and_block_tables_v3,
    torch_attn_ref,
 )
-from tests.test_infer.test_ops.triton.test_decoding_attn import prepare_data
+from tests.test_infer.test_kernels.triton.test_decoding_attn import prepare_data

 try:
    import triton  # noqa
--- a/examples/inference/benchmark_ops/benchmark_flash_decoding_attention.py
+++ b/examples/inference/benchmark_ops/benchmark_flash_decoding_attention.py
@@ -3,7 +3,7 @@ import torch
 from colossalai.kernel.kernel_loader import InferenceOpsLoader
 from colossalai.kernel.triton import flash_decoding_attention
 from colossalai.utils import get_current_device
-from tests.test_infer.test_ops.triton.kernel_utils import (
+from tests.test_infer.test_kernels.triton.kernel_utils import (
    generate_caches_and_block_tables_v2,
    generate_caches_and_block_tables_v3,
    generate_caches_and_block_tables_vllm,
--- a/examples/inference/benchmark_ops/benchmark_fused_rotary_embdding_unpad.py
+++ b/examples/inference/benchmark_ops/benchmark_fused_rotary_embdding_unpad.py
@@ -2,7 +2,7 @@ import torch

 from colossalai.kernel.kernel_loader import InferenceOpsLoader
 from colossalai.kernel.triton import copy_kv_to_blocked_cache, decoding_fused_rotary_embedding, rotary_embedding
-from tests.test_infer.test_ops.triton.kernel_utils import (
+from tests.test_infer.test_kernels.triton.kernel_utils import (
    mock_alloc_block_table_and_kvcache_v2,
    mock_alloc_block_table_and_kvcache_v3,
    mock_alloc_single_token,
--- a/examples/inference/benchmark_ops/benchmark_kv_cache_memcopy.py
+++ b/examples/inference/benchmark_ops/benchmark_kv_cache_memcopy.py
@@ -4,8 +4,8 @@ from colossalai.inference.modeling.layers.attention import copy_to_cache
 from colossalai.kernel.kernel_loader import InferenceOpsLoader
 from colossalai.kernel.triton import copy_kv_to_blocked_cache
 from colossalai.utils import get_current_device
-from tests.test_infer.test_ops.cuda.test_kv_cache_memcpy import prepare_data as prepare_data_new_kcache_layout
-from tests.test_infer.test_ops.triton.test_kvcache_copy import prepare_data
+from tests.test_infer.test_kernels.cuda.test_kv_cache_memcpy import prepare_data as prepare_data_new_kcache_layout
+from tests.test_infer.test_kernels.triton.test_kvcache_copy import prepare_data

 try:
    import triton  # noqa
--- a/examples/inference/benchmark_ops/benchmark_xine_copy.py
+++ b/examples/inference/benchmark_ops/benchmark_xine_copy.py
@@ -1,7 +1,7 @@
 import torch

 from colossalai.kernel.triton import get_xine_cache
-from tests.test_infer.test_ops.triton.test_xine_copy import get_cos_sin
+from tests.test_infer.test_kernels.triton.test_xine_copy import get_cos_sin

 try:
    import triton  # noqa