diff --git a/tests/test_infer/test_config_and_struct.py b/tests/test_infer/test_config_and_struct.py index e0736518c..47d3839e4 100755 --- a/tests/test_infer/test_config_and_struct.py +++ b/tests/test_infer/test_config_and_struct.py @@ -63,6 +63,9 @@ def check_config_and_inference(): batch.add_seqs([sequence]) batch.add_seqs([sequence2, sequence3]) + # add duplicated sequence to test that it will not be counted twice + batch.add_seqs([sequence]) + assert batch.is_empty == False assert batch.get_batch_size() == 3 batch.update_batch_tokens([1, 2, 3]) diff --git a/tests/test_infer_ops/triton/kernel_utils.py b/tests/test_infer/test_ops/triton/kernel_utils.py similarity index 100% rename from tests/test_infer_ops/triton/kernel_utils.py rename to tests/test_infer/test_ops/triton/kernel_utils.py diff --git a/tests/test_infer_ops/triton/test_context_attn_unpad.py b/tests/test_infer/test_ops/triton/test_context_attn_unpad.py similarity index 98% rename from tests/test_infer_ops/triton/test_context_attn_unpad.py rename to tests/test_infer/test_ops/triton/test_context_attn_unpad.py index 0a3ede555..b529e76d1 100644 --- a/tests/test_infer_ops/triton/test_context_attn_unpad.py +++ b/tests/test_infer/test_ops/triton/test_context_attn_unpad.py @@ -6,7 +6,7 @@ from transformers.modeling_attn_mask_utils import AttentionMaskConverter from colossalai.inference.modeling.layers.attention import PagedAttention from colossalai.kernel.triton import context_attention_unpadded from colossalai.utils import get_current_device -from tests.test_infer_ops.triton.kernel_utils import generate_caches_and_block_tables_v2, torch_attn_ref +from tests.test_infer.test_ops.triton.kernel_utils import generate_caches_and_block_tables_v2, torch_attn_ref try: import triton # noqa diff --git a/tests/test_infer_ops/triton/test_decoding_attn.py b/tests/test_infer/test_ops/triton/test_decoding_attn.py similarity index 99% rename from tests/test_infer_ops/triton/test_decoding_attn.py rename to tests/test_infer/test_ops/triton/test_decoding_attn.py index 5eac026bb..4b9b63f7d 100644 --- a/tests/test_infer_ops/triton/test_decoding_attn.py +++ b/tests/test_infer/test_ops/triton/test_decoding_attn.py @@ -4,7 +4,7 @@ from packaging import version from colossalai.kernel.triton import flash_decoding_attention from colossalai.utils import get_current_device -from tests.test_infer_ops.triton.kernel_utils import ( +from tests.test_infer.test_ops.triton.kernel_utils import ( convert_kv_unpad_to_padded, generate_caches_and_block_tables_v2, prepare_padding_mask, diff --git a/tests/test_infer_ops/triton/test_fused_rotary_embedding.py b/tests/test_infer/test_ops/triton/test_fused_rotary_embedding.py similarity index 100% rename from tests/test_infer_ops/triton/test_fused_rotary_embedding.py rename to tests/test_infer/test_ops/triton/test_fused_rotary_embedding.py diff --git a/tests/test_infer_ops/triton/test_kvcache_copy.py b/tests/test_infer/test_ops/triton/test_kvcache_copy.py similarity index 97% rename from tests/test_infer_ops/triton/test_kvcache_copy.py rename to tests/test_infer/test_ops/triton/test_kvcache_copy.py index 3b0a0f765..5612f2bd9 100644 --- a/tests/test_infer_ops/triton/test_kvcache_copy.py +++ b/tests/test_infer/test_ops/triton/test_kvcache_copy.py @@ -5,7 +5,7 @@ from packaging import version from colossalai.inference.modeling.layers.attention import copy_to_cache from colossalai.kernel.triton import copy_kv_to_blocked_cache from colossalai.utils import get_current_device -from tests.test_infer_ops.triton.kernel_utils import generate_caches_and_block_tables_v2, mock_alloc_single_token +from tests.test_infer.test_ops.triton.kernel_utils import generate_caches_and_block_tables_v2, mock_alloc_single_token try: import triton # noqa diff --git a/tests/test_infer_ops/triton/test_rmsnorm_triton.py b/tests/test_infer/test_ops/triton/test_rmsnorm_triton.py similarity index 100% rename from tests/test_infer_ops/triton/test_rmsnorm_triton.py rename to tests/test_infer/test_ops/triton/test_rmsnorm_triton.py diff --git a/tests/test_infer_ops/triton/test_rotary_embdding_unpad.py b/tests/test_infer/test_ops/triton/test_rotary_embdding_unpad.py similarity index 98% rename from tests/test_infer_ops/triton/test_rotary_embdding_unpad.py rename to tests/test_infer/test_ops/triton/test_rotary_embdding_unpad.py index 529c9fb2f..6a8dc85f0 100644 --- a/tests/test_infer_ops/triton/test_rotary_embdding_unpad.py +++ b/tests/test_infer/test_ops/triton/test_rotary_embdding_unpad.py @@ -4,7 +4,7 @@ from packaging import version from transformers.models.llama.modeling_llama import LlamaRotaryEmbedding, apply_rotary_pos_emb from colossalai.kernel.triton import rotary_embedding -from tests.test_infer_ops.triton.kernel_utils import mock_alloc_block_table_and_kvcache_v2 +from tests.test_infer.test_ops.triton.kernel_utils import mock_alloc_block_table_and_kvcache_v2 try: import triton # noqa diff --git a/tests/test_infer_ops/triton/test_xine_copy.py b/tests/test_infer/test_ops/triton/test_xine_copy.py similarity index 100% rename from tests/test_infer_ops/triton/test_xine_copy.py rename to tests/test_infer/test_ops/triton/test_xine_copy.py